From 7aa77836a545bddaddc5860393fd090569690dfe Mon Sep 17 00:00:00 2001 From: Bailey Dixon Date: Sun, 5 Apr 2026 10:59:33 -0400 Subject: [PATCH 01/17] feat: Add agent instructions, OpenAI-compatible providers, and fix window drag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add `instructions`/`instructionsFile` fields on Profile with `resolveInstructions()` helper; injected as ARC_AGENT_INSTRUCTIONS env var at launch time - Add `arc instructions` CLI (show/set/edit/clear) for managing per-profile system prompts - Add `openai-compat` auth type and `ProviderConfig` (baseUrl, model, apiKeyEnvVar) on Profile - Add OpenAI Compatible adapter with full lifecycle (spawn, terminate, health, output) - Add `arc provider` CLI (set/show/clear/presets) with 7 presets: OpenRouter, Ollama, LM Studio, Together AI, Groq, MiniMax, DeepSeek - Fix TUI window drag on Windows by removing unused mouse tracking ANSI sequences - Update docs: CLAUDE.md, FEATURES.md, getting-started.md, authentication.md - Update tests: adapter count 5→6, resolveInstructions tests, openai-compat adapter test Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 4 +- FEATURES.md | 5 +- packages/adapter-claude/src/auth.ts | 6 + packages/cli/src/adapters/index.ts | 126 ++++++++++++++ packages/cli/src/cli.ts | 87 +++++++++- packages/cli/src/commands/instructions.ts | 141 ++++++++++++++++ packages/cli/src/commands/launch.ts | 15 +- packages/cli/src/commands/provider.ts | 188 +++++++++++++++++++++ packages/cli/src/tui/render.tsx | 6 +- packages/core/src/config.ts | 22 ++- packages/core/src/index.ts | 1 + packages/core/src/types.ts | 20 ++- tests/integration/adapter-registry.test.ts | 12 +- tests/unit/resolve-profile.test.ts | 52 +++++- user-docs/guide/authentication.md | 30 ++++ user-docs/guide/getting-started.md | 37 ++++ 16 files changed, 738 insertions(+), 14 deletions(-) create mode 100644 packages/cli/src/commands/instructions.ts create mode 100644 packages/cli/src/commands/provider.ts diff --git a/CLAUDE.md b/CLAUDE.md index b0c9af7..3960a26 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -30,7 +30,9 @@ ARC (Agent Runtime Control) is a CLI + TUI for managing multiple agent profiles - **Deployment:** Root `Dockerfile` + `nginx.conf` — multi-stage build merging `site/` at `/` and `user-docs/` at `/docs/` into single nginx container - **Web Dashboard:** 13 view components (Overview, Sessions, Traces, Risk, Tasks, Skills, Memory, Agents, Factory + Profiles, Diagnostics, Sync, Plugins) - **Orchestration layer:** Hook pipeline (8 hooks in priority order), roundtable multi-agent discussions, task delegation protocol, interagent routing, source classification -- **Adapters:** Claude Code (SDK + plugin + hooks), Codex CLI, Gemini CLI, OpenClaw (native plugin), Hermes Agent (MCP bridge), Generic (fallback for any tool) +- **Adapters:** Claude Code (SDK + plugin + hooks), Codex CLI, Gemini CLI, OpenClaw (native plugin), Hermes Agent (MCP bridge), OpenAI Compatible (custom providers), Generic (fallback for any tool) +- **Agent instructions:** `instructions` / `instructionsFile` fields on Profile; resolved at launch, injected as `ARC_AGENT_INSTRUCTIONS` env var; `arc instructions` CLI for show/set/edit/clear +- **Custom providers:** `openai-compat` auth type + `ProviderConfig` (baseUrl, model, apiKeyEnvVar) on Profile; 7 presets (OpenRouter, Ollama, LM Studio, Together, Groq, MiniMax, DeepSeek); `arc provider` CLI for set/show/clear/presets ## Key Conventions diff --git a/FEATURES.md b/FEATURES.md index 248174d..4a1869a 100644 --- a/FEATURES.md +++ b/FEATURES.md @@ -27,6 +27,8 @@ Tracking file for planned features, enhancements, and ideas. Checked items are s - [x] **Tool-adapter architecture** — `RuntimeAdapter` interface with lifecycle methods; Claude, Codex, Gemini, OpenClaw, and Generic adapters (Phase 2, 5-6) - [x] **Profile inheritance** — `inherits` field + `resolveProfile()` engine for base + override resolution (Phase 9) - [x] **Project-local config** (`arc.json`) — preferred tool, profile, workspace overrides per repo (Phase 9) +- [x] **Agent instructions** — `instructions` / `instructionsFile` fields on Profile, resolved at launch, injected as `ARC_AGENT_INSTRUCTIONS` env var; `arc instructions` CLI (show/set/edit/clear) +- [x] **OpenAI-compatible providers** — `openai-compat` auth type + `ProviderConfig` on Profile (baseUrl, model, apiKeyEnvVar); 7 presets (OpenRouter, Ollama, LM Studio, Together, Groq, MiniMax, DeepSeek); `arc provider` CLI (set/show/clear/presets) - [ ] **Team/shared config** — repo-checked config with local secret overlays - [ ] **Backup/export/import** — move profiles and settings between machines - [x] **Managed updates** — self-update system with npm registry check and TUI update banner @@ -53,7 +55,8 @@ Tracking file for planned features, enhancements, and ideas. Checked items are s - [x] OpenClaw adapter (plugin manifest, RuntimeAdapter, 3 lifecycle hooks) - [x] Hermes Agent adapter (MCP bridge, lifecycle, process management) - [x] Generic adapter factory (fallback for any unknown tool, health monitoring) -- [x] 48 adapter registry + generic adapter tests +- [x] OpenAI Compatible adapter (custom provider endpoints, 7 presets) +- [x] 50+ adapter registry + generic adapter tests ### Logging & Lifecycle (Phases 3-4) - [x] Structured JSONL log at `~/.arc/logs/structured.jsonl` diff --git a/packages/adapter-claude/src/auth.ts b/packages/adapter-claude/src/auth.ts index 722e8e5..1af6164 100644 --- a/packages/adapter-claude/src/auth.ts +++ b/packages/adapter-claude/src/auth.ts @@ -123,6 +123,12 @@ export async function getClaudeCredentialStatus(profile: Profile): Promise false); + return { + authenticated: hasKey, + authType: "openai-compat", + method: "api-key", + }; + } } }, async buildProfileEnv(profile: Profile): Promise> { @@ -181,6 +192,11 @@ function createBasicAdapter(config: { if (config.configEnvVar) { env[config.configEnvVar] = profile.configDir; } + // Inject provider config as env vars for OpenAI-compatible endpoints + if (profile.provider) { + if (profile.provider.baseUrl) env["OPENAI_BASE_URL"] = profile.provider.baseUrl; + if (profile.provider.model) env["OPENAI_MODEL"] = profile.provider.model; + } for (const [key, value] of Object.entries(profile.envOverrides ?? {})) { env[key] = value; } @@ -590,12 +606,122 @@ const codexAdapter = createBasicAdapter({ lifecycle: codexLifecycle, }); +// ─── OpenAI-compatible adapter ────────────────────────────────────── + +const openaiCompatProcessHandles = new Map(); + +const openaiCompatLifecycle: LifecycleOverrides = { + async launch(profile: Profile, options: LaunchOptions): Promise { + // Determine binary: use envOverrides.OPENAI_COMPAT_BINARY or fall back to "codex" + const binary = profile.envOverrides?.["OPENAI_COMPAT_BINARY"] || "codex"; + const args = [...options.args]; + + // Inject provider config into env + const env: NodeJS.ProcessEnv = { ...process.env, ...options.env }; + + if (profile.provider) { + if (profile.provider.baseUrl) { + env["OPENAI_BASE_URL"] = profile.provider.baseUrl; + } + if (profile.provider.model) { + // Pass as --model arg for Codex; also set env for tools that read it + args.unshift("--model", profile.provider.model); + env["OPENAI_MODEL"] = profile.provider.model; + } + } + + if (options.beforeSpawn) { + await options.beforeSpawn(); + } + + const command = process.platform === "win32" ? "cmd" : binary; + const spawnArgs = process.platform === "win32" ? ["/c", binary, ...args] : args; + + const handle = spawnManagedProcess({ + command, + args: spawnArgs, + env, + cwd: options.cwd, + component: "openai-compat", + }); + + openaiCompatProcessHandles.set(handle.pid, handle); + handle.child.once("exit", () => { + openaiCompatProcessHandles.delete(handle.pid); + }); + + return { + pid: handle.pid, + tool: "openai-compat", + profile: "default", + startedAt: new Date(), + }; + }, + + async terminate(agentProcess: AgentProcess): Promise { + openaiCompatProcessHandles.delete(agentProcess.pid); + await terminateProcess(agentProcess.pid, "openai-compat"); + }, + + isRunning(agentProcess: AgentProcess): boolean { + const alive = isProcessRunning(agentProcess.pid); + writeLogEvent({ + level: "debug", + component: "openai-compat", + action: alive ? "process:alive" : "process:dead", + message: `pid=${agentProcess.pid}`, + data: { pid: agentProcess.pid }, + }); + return alive; + }, + + onOutput(agentProcess: AgentProcess, handler: (event: OutputEvent) => void): void { + const handle = openaiCompatProcessHandles.get(agentProcess.pid); + if (!handle?.child.stdout) return; + + const rl = createInterface({ input: handle.child.stdout }); + rl.on("line", (line) => { + handler({ + type: "raw", + content: line, + timestamp: new Date(), + }); + }); + }, +}; + +const openaiCompatAdapter = createBasicAdapter({ + id: "openai-compat", + displayName: "OpenAI Compatible", + dirName: ".openai-compat", + markerFiles: ["config.json", ".env"], + installHint: + "Configure with: arc create --tool openai-compat --auth-type openai-compat\n" + + "Then set provider: arc provider set --base-url --model ", + configEnvVar: "OPENAI_COMPAT_HOME", + capabilities: { + hooks: false, + sdkControl: false, + pluginSystem: false, + mcpSupport: false, + jsonOutput: false, + sandboxing: false, + processWrap: true, + remoteSupport: false, + permissionTier: "interactive", + }, + lifecycle: openaiCompatLifecycle, +}); + +// ─── Adapter registry ─────────────────────────────────────────────── + const adapters = new Map([ [claudeAdapter.id, claudeAdapter], [geminiAdapter.id, geminiAdapter], [codexAdapter.id, codexAdapter], [openclawAdapter.id, openclawAdapter], [hermesAdapter.id, hermesAdapter], + [openaiCompatAdapter.id, openaiCompatAdapter], ]); export function listAdapters(): RuntimeAdapter[] { diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 3347e92..eeccf60 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -51,7 +51,7 @@ export function createProgram(): Command { .description("Create a new profile") .option( "--auth-type ", - "Auth type (oauth, api-key, bedrock, vertex, foundry)" + "Auth type (oauth, api-key, bedrock, vertex, foundry, openai-compat)" ) .option("--tool ", "Agent tool binary (claude, gemini, codex, ...)") .option("--description ", "Profile description") @@ -360,7 +360,7 @@ Examples: .description("Create a new profile") .option( "--auth-type ", - "Auth type (oauth, api-key, bedrock, vertex, foundry)" + "Auth type (oauth, api-key, bedrock, vertex, foundry, openai-compat)" ) .option("--tool ", "Agent tool binary (claude, gemini, codex, ...)") .option("--description ", "Profile description") @@ -453,6 +453,89 @@ Examples: showInfo("These flags will be prepended on every `arc launch`."); }); + // === Agent Instructions === + + const instructions = program + .command("instructions") + .alias("inst") + .description("Manage agent instructions / system prompts per profile"); + + instructions + .command("show [name]") + .description("Show resolved instructions for a profile (default: active)") + .action(async (name?: string) => { + const mod = await import("./commands/instructions.js"); + await mod.handleInstructionsShow(name); + }); + + instructions + .command("set ") + .description("Set inline instructions for a profile") + .option("--from-file ", "Read instructions from a file instead of inline") + .option("--file ", "Set instructionsFile path (read at launch time)") + .action(async (name: string, opts: { fromFile?: string; file?: string }) => { + const mod = await import("./commands/instructions.js"); + await mod.handleInstructionsSet(name, opts); + }); + + instructions + .command("edit ") + .description("Open instructions in $EDITOR") + .action(async (name: string) => { + const mod = await import("./commands/instructions.js"); + await mod.handleInstructionsEdit(name); + }); + + instructions + .command("clear ") + .description("Remove instructions from a profile") + .action(async (name: string) => { + const mod = await import("./commands/instructions.js"); + await mod.handleInstructionsClear(name); + }); + + // === Provider Configuration === + + const provider = program + .command("provider") + .description("Configure custom OpenAI-compatible providers for profiles"); + + provider + .command("set ") + .description("Set provider config on a profile") + .option("--base-url ", "API base URL (e.g. https://openrouter.ai/api/v1)") + .option("--model ", "Model identifier (e.g. anthropic/claude-3.5-sonnet)") + .option("--api-key-var ", "Env var name for the API key (default: OPENAI_API_KEY)") + .option("--display-name ", "Provider display name (e.g. OpenRouter, Ollama)") + .action(async (name: string, opts: { baseUrl?: string; model?: string; apiKeyVar?: string; displayName?: string }) => { + const mod = await import("./commands/provider.js"); + await mod.handleProviderSet(name, opts); + }); + + provider + .command("show [name]") + .description("Show provider config for a profile (default: active)") + .action(async (name?: string) => { + const mod = await import("./commands/provider.js"); + await mod.handleProviderShow(name); + }); + + provider + .command("clear ") + .description("Remove provider config from a profile") + .action(async (name: string) => { + const mod = await import("./commands/provider.js"); + await mod.handleProviderClear(name); + }); + + provider + .command("presets") + .description("List known provider presets (OpenRouter, Ollama, LM Studio, etc.)") + .action(async () => { + const mod = await import("./commands/provider.js"); + await mod.handleProviderPresets(); + }); + // === Shared Layer === const shared = program diff --git a/packages/cli/src/commands/instructions.ts b/packages/cli/src/commands/instructions.ts new file mode 100644 index 0000000..ee1a567 --- /dev/null +++ b/packages/cli/src/commands/instructions.ts @@ -0,0 +1,141 @@ +import fs from "node:fs"; +import path from "node:path"; +import { spawnSync } from "node:child_process"; +import { loadConfig, saveConfig } from "../config.js"; +import { resolveInstructions } from "@axiom-labs/arc-core"; +import { success, error, info, warn } from "../display.js"; + +function getProfile(name?: string) { + const config = loadConfig(); + const profileName = name ?? config.activeProfile; + const profile = config.profiles[profileName]; + if (!profile) { + error(`Profile "${profileName}" not found.`); + process.exit(1); + } + return { config, profileName, profile }; +} + +export async function handleInstructionsShow(name?: string): Promise { + const { profileName, profile } = getProfile(name); + const text = resolveInstructions(profile); + + if (!text) { + info(`No instructions configured for "${profileName}".`); + info("Set with: arc instructions set --from-file ./INSTRUCTIONS.md"); + return; + } + + const source = profile.instructionsFile ? `file: ${profile.instructionsFile}` : "inline"; + info(`Instructions for "${profileName}" (${source}, ${text.length} chars):`); + console.log(); + console.log(text); +} + +export async function handleInstructionsSet( + name: string, + opts: { fromFile?: string; file?: string }, +): Promise { + const { config, profileName, profile } = getProfile(name); + + if (opts.file) { + // Set instructionsFile path (lazy-loaded at launch time) + const resolved = path.resolve(opts.file); + if (!fs.existsSync(resolved)) { + warn(`File "${resolved}" does not exist yet — it will be read at launch time.`); + } + profile.instructionsFile = resolved; + delete profile.instructions; + saveConfig(config); + success(`Instructions file for "${profileName}" set to: ${resolved}`); + return; + } + + if (opts.fromFile) { + // Read file and store as inline instructions + const resolved = path.resolve(opts.fromFile); + try { + const text = fs.readFileSync(resolved, "utf-8"); + profile.instructions = text; + delete profile.instructionsFile; + saveConfig(config); + success(`Inline instructions for "${profileName}" set from: ${resolved} (${text.length} chars)`); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + error(`Failed to read "${resolved}": ${msg}`); + process.exit(1); + } + return; + } + + // Interactive: read from stdin + info("Enter instructions (paste text, then press Ctrl+D when done):"); + const chunks: Buffer[] = []; + for await (const chunk of process.stdin) { + chunks.push(chunk as Buffer); + } + const text = Buffer.concat(chunks).toString("utf-8").trim(); + + if (!text) { + error("No instructions provided."); + process.exit(1); + } + + profile.instructions = text; + delete profile.instructionsFile; + saveConfig(config); + success(`Inline instructions for "${profileName}" set (${text.length} chars).`); +} + +export async function handleInstructionsEdit(name: string): Promise { + const { config, profileName, profile } = getProfile(name); + + const editor = process.env["EDITOR"] || process.env["VISUAL"] || (process.platform === "win32" ? "notepad" : "vi"); + + // Write current instructions to a temp file + const tmpDir = path.join(profile.configDir, ".arc-tmp"); + fs.mkdirSync(tmpDir, { recursive: true }); + const tmpFile = path.join(tmpDir, "instructions.md"); + + const existing = resolveInstructions(profile) ?? ""; + fs.writeFileSync(tmpFile, existing, "utf-8"); + + info(`Opening ${editor}...`); + const result = spawnSync(editor, [tmpFile], { stdio: "inherit" }); + + if (result.status !== 0) { + error(`Editor exited with code ${result.status}.`); + try { fs.unlinkSync(tmpFile); fs.rmdirSync(tmpDir); } catch { /* ignore */ } + process.exit(1); + } + + const updated = fs.readFileSync(tmpFile, "utf-8").trim(); + try { fs.unlinkSync(tmpFile); fs.rmdirSync(tmpDir); } catch { /* ignore */ } + + if (!updated) { + profile.instructions = undefined; + profile.instructionsFile = undefined; + saveConfig(config); + info(`Instructions cleared for "${profileName}".`); + return; + } + + profile.instructions = updated; + delete profile.instructionsFile; + saveConfig(config); + success(`Instructions for "${profileName}" updated (${updated.length} chars).`); +} + +export async function handleInstructionsClear(name: string): Promise { + const { config, profileName, profile } = getProfile(name); + + if (!profile.instructions && !profile.instructionsFile) { + info(`No instructions configured for "${profileName}".`); + return; + } + + profile.instructions = undefined; + profile.instructionsFile = undefined; + saveConfig(config); + success(`Instructions cleared for "${profileName}".`); +} diff --git a/packages/cli/src/commands/launch.ts b/packages/cli/src/commands/launch.ts index 2001833..f43c6ed 100644 --- a/packages/cli/src/commands/launch.ts +++ b/packages/cli/src/commands/launch.ts @@ -3,7 +3,7 @@ import fs from "node:fs"; import path from "node:path"; import { loadConfig } from "../config.js"; import { buildProfileEnv } from "../auth.js"; -import { resolveEffectiveProfile } from "@axiom-labs/arc-core"; +import { resolveEffectiveProfile, resolveInstructions } from "@axiom-labs/arc-core"; import { error, info, warn, cmd } from "../display.js"; import { logAction } from "../log.js"; import { getAdapter } from "../adapters/index.js"; @@ -211,6 +211,19 @@ export async function handleLaunch( const profileEnv = await buildProfileEnv(profile, profileName); + // ─── Resolve and inject agent instructions ───────────────────────── + const instructionsText = resolveInstructions(profile); + if (instructionsText) { + profileEnv["ARC_AGENT_INSTRUCTIONS"] = instructionsText; + writeLogEvent({ + level: "info", + component: "launch", + action: "instructions:resolved", + message: `Agent instructions loaded (${instructionsText.length} chars)`, + data: { profile: profileName, source: profile.instructionsFile ? "file" : "inline" }, + }); + } + if (!findBinary(tool)) { error(`Binary "${tool}" not found on PATH.`); warn(getInstallHint(tool)); diff --git a/packages/cli/src/commands/provider.ts b/packages/cli/src/commands/provider.ts new file mode 100644 index 0000000..b128dc6 --- /dev/null +++ b/packages/cli/src/commands/provider.ts @@ -0,0 +1,188 @@ +import { loadConfig, saveConfig } from "../config.js"; +import { success, error, info, cmd } from "../display.js"; +import type { ProviderConfig } from "@axiom-labs/arc-core"; + +// ─── Known provider presets ───────────────────────────────────────── + +interface ProviderPreset { + id: string; + displayName: string; + baseUrl: string; + apiKeyEnvVar: string; + models: string[]; + notes: string; +} + +const PRESETS: ProviderPreset[] = [ + { + id: "openrouter", + displayName: "OpenRouter", + baseUrl: "https://openrouter.ai/api/v1", + apiKeyEnvVar: "OPENROUTER_API_KEY", + models: ["anthropic/claude-sonnet-4", "openai/gpt-4o", "google/gemini-2.5-pro", "meta-llama/llama-4-maverick"], + notes: "Multi-provider gateway — use any model from any provider", + }, + { + id: "ollama", + displayName: "Ollama", + baseUrl: "http://localhost:11434/v1", + apiKeyEnvVar: "OLLAMA_API_KEY", + models: ["llama3", "codellama", "mistral", "deepseek-coder"], + notes: "Local models — no API key needed (set dummy value)", + }, + { + id: "lm-studio", + displayName: "LM Studio", + baseUrl: "http://localhost:1234/v1", + apiKeyEnvVar: "LM_STUDIO_API_KEY", + models: ["loaded-model"], + notes: "Local inference — no API key needed (set dummy value)", + }, + { + id: "together", + displayName: "Together AI", + baseUrl: "https://api.together.xyz/v1", + apiKeyEnvVar: "TOGETHER_API_KEY", + models: ["meta-llama/Llama-3-70b-chat-hf", "mistralai/Mixtral-8x7B-Instruct-v0.1"], + notes: "Cloud GPU inference for open models", + }, + { + id: "groq", + displayName: "Groq", + baseUrl: "https://api.groq.com/openai/v1", + apiKeyEnvVar: "GROQ_API_KEY", + models: ["llama-3.3-70b-versatile", "mixtral-8x7b-32768"], + notes: "Ultra-fast inference on custom LPU hardware", + }, + { + id: "minimax", + displayName: "MiniMax", + baseUrl: "https://api.minimax.chat/v1", + apiKeyEnvVar: "MINIMAX_API_KEY", + models: ["MiniMax-Text-01", "abab6.5s-chat"], + notes: "MiniMax cloud models", + }, + { + id: "deepseek", + displayName: "DeepSeek", + baseUrl: "https://api.deepseek.com/v1", + apiKeyEnvVar: "DEEPSEEK_API_KEY", + models: ["deepseek-chat", "deepseek-coder"], + notes: "DeepSeek coding-focused models", + }, +]; + +// ─── Helpers ──────────────────────────────────────────────────────── + +function getProfile(name?: string) { + const config = loadConfig(); + const profileName = name ?? config.activeProfile; + const profile = config.profiles[profileName]; + if (!profile) { + error(`Profile "${profileName}" not found.`); + process.exit(1); + } + return { config, profileName, profile }; +} + +// ─── Handlers ─────────────────────────────────────────────────────── + +export async function handleProviderSet( + name: string, + opts: { baseUrl?: string; model?: string; apiKeyVar?: string; displayName?: string }, +): Promise { + const { config, profileName, profile } = getProfile(name); + + if (!opts.baseUrl && !opts.model && !opts.apiKeyVar && !opts.displayName) { + error("Provide at least one option: --base-url, --model, --api-key-var, or --display-name"); + info(`Example: ${cmd("arc provider set " + profileName + " --base-url https://openrouter.ai/api/v1 --model anthropic/claude-sonnet-4")}`); + process.exit(1); + } + + // Check if input matches a preset + const preset = PRESETS.find((p) => + opts.baseUrl === p.baseUrl || opts.displayName?.toLowerCase() === p.id, + ); + + const existing: ProviderConfig = profile.provider ?? { baseUrl: "" }; + + if (opts.baseUrl) existing.baseUrl = opts.baseUrl; + if (opts.model) existing.model = opts.model; + if (opts.apiKeyVar) existing.apiKeyEnvVar = opts.apiKeyVar; + if (opts.displayName) existing.displayName = opts.displayName; + + // Apply preset defaults for fields not explicitly set + if (preset) { + if (!opts.apiKeyVar && !existing.apiKeyEnvVar) existing.apiKeyEnvVar = preset.apiKeyEnvVar; + if (!opts.displayName && !existing.displayName) existing.displayName = preset.displayName; + } + + profile.provider = existing; + + // Auto-set authType to openai-compat if not already + if (profile.authType !== "openai-compat" && profile.authType !== "api-key") { + profile.authType = "openai-compat"; + } + + saveConfig(config); + + const label = existing.displayName ?? "Custom Provider"; + success(`Provider for "${profileName}" configured: ${label}`); + if (existing.baseUrl) info(` Base URL: ${existing.baseUrl}`); + if (existing.model) info(` Model: ${existing.model}`); + if (existing.apiKeyEnvVar) info(` API key env var: ${existing.apiKeyEnvVar}`); + + // Hint about setting the key + const keyVar = existing.apiKeyEnvVar ?? "OPENAI_API_KEY"; + info(`\nSet your API key: ${cmd(`arc set-key ${profileName}`)}`); + info(`Or via env: ${cmd(`export ${keyVar}=sk-...`)}`); +} + +export async function handleProviderShow(name?: string): Promise { + const { profileName, profile } = getProfile(name); + const p = profile.provider; + + if (!p) { + info(`No provider configured for "${profileName}".`); + info(`Set one with: ${cmd(`arc provider set ${profileName} --base-url `)}`); + info(`Or use a preset: ${cmd("arc provider presets")}`); + return; + } + + const label = p.displayName ?? "Custom Provider"; + info(`Provider for "${profileName}": ${label}`); + console.log(` Base URL: ${p.baseUrl || "(not set)"}`); + console.log(` Model: ${p.model || "(not set)"}`); + console.log(` API key var: ${p.apiKeyEnvVar || "OPENAI_API_KEY"}`); +} + +export async function handleProviderClear(name: string): Promise { + const { config, profileName, profile } = getProfile(name); + + if (!profile.provider) { + info(`No provider configured for "${profileName}".`); + return; + } + + profile.provider = undefined; + saveConfig(config); + success(`Provider cleared for "${profileName}".`); +} + +export async function handleProviderPresets(): Promise { + info("Known provider presets:\n"); + + for (const p of PRESETS) { + console.log(` ${p.displayName.padEnd(14)} ${p.baseUrl}`); + console.log(` ${"".padEnd(14)} Key var: ${p.apiKeyEnvVar}`); + console.log(` ${"".padEnd(14)} Models: ${p.models.slice(0, 3).join(", ")}`); + console.log(` ${"".padEnd(14)} ${p.notes}`); + console.log(); + } + + info("Quick setup example:"); + console.log(` ${cmd("arc create openrouter --tool openai-compat --auth-type openai-compat")}`); + console.log(` ${cmd("arc provider set openrouter --base-url https://openrouter.ai/api/v1 --model anthropic/claude-sonnet-4")}`); + console.log(` ${cmd("arc set-key openrouter")}`); + console.log(` ${cmd("arc launch openrouter")}`); +} diff --git a/packages/cli/src/tui/render.tsx b/packages/cli/src/tui/render.tsx index 9acd986..e5d1681 100644 --- a/packages/cli/src/tui/render.tsx +++ b/packages/cli/src/tui/render.tsx @@ -13,13 +13,11 @@ export function markLaunchPending(): void { const ALT_BUFFER_ON = "\x1b[?1049h"; const ALT_BUFFER_OFF = "\x1b[?1049l"; -const MOUSE_ON = "\x1b[?1000h\x1b[?1006h"; -const MOUSE_OFF = "\x1b[?1006l\x1b[?1000l"; const CURSOR_SHOW = "\x1b[?25h"; function restoreTerminal(): void { try { - process.stdout.write(MOUSE_OFF + ALT_BUFFER_OFF + CURSOR_SHOW); + process.stdout.write(ALT_BUFFER_OFF + CURSOR_SHOW); } catch { // stdout may already be closed during teardown } @@ -53,7 +51,7 @@ export async function renderDashboard(): Promise { await withLifecycleScope({ component: "tui" }, async (scope) => { scope.registerCleanup(restoreTerminal); - process.stdout.write(ALT_BUFFER_ON + MOUSE_ON); + process.stdout.write(ALT_BUFFER_ON); writeLogEvent({ level: "info", component: "tui", action: "dashboard:start" }); const instance = render( diff --git a/packages/core/src/config.ts b/packages/core/src/config.ts index 79d73e5..47fc156 100644 --- a/packages/core/src/config.ts +++ b/packages/core/src/config.ts @@ -5,7 +5,7 @@ import { getArcDir, getConfigPath } from "./paths.js"; import { deepMerge } from "./shared-fs.js"; import type { ArcConfig, Profile } from "./types.js"; -const AUTH_TYPES = new Set(["oauth", "api-key", "bedrock", "vertex", "foundry"]); +const AUTH_TYPES = new Set(["oauth", "api-key", "bedrock", "vertex", "foundry", "openai-compat"]); export function defaultConfig(): ArcConfig { return { version: 1, activeProfile: "default", profiles: {} }; @@ -171,3 +171,23 @@ export function resolveProfile(config: ArcConfig, profileName: string): Profile return merged as unknown as Profile; } + +/** + * Resolve the effective instructions text for a profile. + * + * Priority: instructionsFile (read from disk) > inline instructions > undefined. + * Returns undefined if no instructions are configured. + */ +export function resolveInstructions(profile: Profile): string | undefined { + if (profile.instructionsFile) { + const resolved = path.isAbsolute(profile.instructionsFile) + ? profile.instructionsFile + : path.resolve(profile.configDir, profile.instructionsFile); + try { + return fs.readFileSync(resolved, "utf-8"); + } catch { + // File missing or unreadable — fall through to inline + } + } + return profile.instructions; +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 65d919d..0b24272 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -37,6 +37,7 @@ export type { AuthType, AgentTool, Profile, + ProviderConfig, ArcSettings, ArcConfig, SharedManifest, diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index 20833c3..83cc17d 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -1,4 +1,4 @@ -export type AuthType = "oauth" | "api-key" | "bedrock" | "vertex" | "foundry"; +export type AuthType = "oauth" | "api-key" | "bedrock" | "vertex" | "foundry" | "openai-compat"; export type AgentTool = string; @@ -20,6 +20,18 @@ export interface HookConfig { options?: Record; } +/** OpenAI-compatible provider configuration. */ +export interface ProviderConfig { + /** API base URL (e.g. https://openrouter.ai/api/v1, http://localhost:11434/v1) */ + baseUrl: string; + /** Model identifier (e.g. anthropic/claude-3.5-sonnet, llama3) */ + model?: string; + /** Env var name that holds the API key. Defaults to OPENAI_API_KEY. */ + apiKeyEnvVar?: string; + /** Provider display name for UI (e.g. "OpenRouter", "Ollama", "LM Studio") */ + displayName?: string; +} + export interface Profile { authType: AuthType; tool?: AgentTool; @@ -38,6 +50,12 @@ export interface Profile { enforcement?: EnforcementMode; /** Per-hook configuration overrides. Keys are hook names. */ hooks?: Record; + /** Custom instructions / system prompt injected into agent context. */ + instructions?: string; + /** Path to an instructions file (read at launch time). Takes precedence over inline instructions. */ + instructionsFile?: string; + /** OpenAI-compatible provider configuration for custom endpoints. */ + provider?: ProviderConfig; } export interface ArcSettings { diff --git a/tests/integration/adapter-registry.test.ts b/tests/integration/adapter-registry.test.ts index 66294ec..f969d58 100644 --- a/tests/integration/adapter-registry.test.ts +++ b/tests/integration/adapter-registry.test.ts @@ -10,15 +10,16 @@ import os from "node:os"; // They describe current behavior, not ideal behavior. describe("Adapter registry", () => { - it("listAdapters() returns exactly 5 adapters", () => { + it("listAdapters() returns exactly 6 adapters", () => { const adapters = listAdapters(); - expect(adapters).toHaveLength(5); + expect(adapters).toHaveLength(6); const ids = adapters.map((a) => a.id); expect(ids).toContain("claude"); expect(ids).toContain("gemini"); expect(ids).toContain("codex"); expect(ids).toContain("openclaw"); expect(ids).toContain("hermes"); + expect(ids).toContain("openai-compat"); }); it('getAdapter("claude") returns adapter with id "claude" and displayName "Claude Code"', () => { @@ -39,6 +40,13 @@ describe("Adapter registry", () => { expect(adapter.displayName).toBe("Codex CLI"); }); + it('getAdapter("openai-compat") returns adapter with id "openai-compat"', () => { + const adapter = getAdapter("openai-compat"); + expect(adapter.id).toBe("openai-compat"); + expect(adapter.displayName).toBe("OpenAI Compatible"); + expect(adapter.capabilities.processWrap).toBe(true); + }); + it("getAdapter() with unknown tool returns a fallback adapter (not undefined/null)", () => { const adapter = getAdapter("unknown-tool"); expect(adapter).toBeDefined(); diff --git a/tests/unit/resolve-profile.test.ts b/tests/unit/resolve-profile.test.ts index 182b24a..991e9ef 100644 --- a/tests/unit/resolve-profile.test.ts +++ b/tests/unit/resolve-profile.test.ts @@ -1,5 +1,8 @@ import { describe, it, expect } from "vitest"; -import { resolveProfile, validateConfig } from "@axiom-labs/arc-core"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { resolveProfile, validateConfig, resolveInstructions } from "@axiom-labs/arc-core"; import type { ArcConfig, Profile } from "@axiom-labs/arc-core"; // ─── Helpers ───────────────────────────────────────────────────────── @@ -310,3 +313,50 @@ describe("validateConfig with inherits", () => { expect(validateConfig(config)).toBe(true); }); }); + +// ─── resolveInstructions ──────────────────────────────────────────── + +describe("resolveInstructions", () => { + it("returns undefined when no instructions configured", () => { + const profile = baseProfile(); + expect(resolveInstructions(profile)).toBeUndefined(); + }); + + it("returns inline instructions text", () => { + const profile = baseProfile({ instructions: "You are a helpful assistant." }); + expect(resolveInstructions(profile)).toBe("You are a helpful assistant."); + }); + + it("reads instructionsFile from disk", () => { + const tmpFile = path.join(os.tmpdir(), `arc-test-instructions-${Date.now()}.md`); + fs.writeFileSync(tmpFile, "Instructions from file.", "utf-8"); + try { + const profile = baseProfile({ instructionsFile: tmpFile }); + expect(resolveInstructions(profile)).toBe("Instructions from file."); + } finally { + fs.unlinkSync(tmpFile); + } + }); + + it("instructionsFile takes precedence over inline instructions", () => { + const tmpFile = path.join(os.tmpdir(), `arc-test-instructions-${Date.now()}.md`); + fs.writeFileSync(tmpFile, "File wins.", "utf-8"); + try { + const profile = baseProfile({ + instructions: "Inline text.", + instructionsFile: tmpFile, + }); + expect(resolveInstructions(profile)).toBe("File wins."); + } finally { + fs.unlinkSync(tmpFile); + } + }); + + it("falls back to inline if instructionsFile is missing", () => { + const profile = baseProfile({ + instructions: "Fallback text.", + instructionsFile: "/nonexistent/path/instructions.md", + }); + expect(resolveInstructions(profile)).toBe("Fallback text."); + }); +}); diff --git a/user-docs/guide/authentication.md b/user-docs/guide/authentication.md index eaa6fb9..9c86b2d 100644 --- a/user-docs/guide/authentication.md +++ b/user-docs/guide/authentication.md @@ -94,6 +94,36 @@ ARC sets `CLAUDE_CODE_USE_FOUNDRY=1`. Required variables: | `ANTHROPIC_FOUNDRY_BASE_URL` | Foundry endpoint base URL | | `ANTHROPIC_FOUNDRY_RESOURCE` | Foundry resource name | +## OpenAI-Compatible Providers + +Connect any OpenAI-compatible API (OpenRouter, Ollama, LM Studio, Together AI, Groq, MiniMax, DeepSeek, and more). + +```bash +arc create openrouter --tool openai-compat --auth-type openai-compat +arc provider set openrouter --base-url https://openrouter.ai/api/v1 --model anthropic/claude-sonnet-4 +arc set-key openrouter +``` + +ARC injects `OPENAI_BASE_URL` and `OPENAI_API_KEY` into the agent tool's environment. + +| Variable | Description | +|----------|-------------| +| `OPENAI_BASE_URL` | API endpoint from provider config | +| `OPENAI_API_KEY` | API key (stored via `arc set-key`) | +| `OPENAI_MODEL` | Model identifier from provider config | + +Run `arc provider presets` to see all known presets with their default base URLs and models. + +### Local Models + +For Ollama, LM Studio, or other local inference servers, no API key is needed: + +```bash +arc create local --tool openai-compat --auth-type openai-compat +arc provider set local --base-url http://localhost:11434/v1 --model llama3 --display-name Ollama +arc launch local +``` + ## Encrypted Secrets ARC includes an encrypted secret store for arbitrary key-value secrets, using Argon2id KDF and AES-256-GCM per-entry encryption. diff --git a/user-docs/guide/getting-started.md b/user-docs/guide/getting-started.md index 5954286..9df3e0d 100644 --- a/user-docs/guide/getting-started.md +++ b/user-docs/guide/getting-started.md @@ -98,6 +98,43 @@ arc use gemini-work arc launch # launches Gemini CLI ``` +### Custom Provider (OpenAI-Compatible) + +Connect any OpenAI-compatible API — OpenRouter, Ollama, LM Studio, and more: + +```bash +# OpenRouter (cloud multi-provider gateway) +arc create openrouter --tool openai-compat --auth-type openai-compat +arc provider set openrouter --base-url https://openrouter.ai/api/v1 --model anthropic/claude-sonnet-4 +arc set-key openrouter +arc launch openrouter + +# Ollama (local models) +arc create local --tool openai-compat --auth-type openai-compat +arc provider set local --base-url http://localhost:11434/v1 --model llama3 --display-name Ollama +arc launch local +``` + +Run `arc provider presets` to see all known providers. + +### Agent Instructions + +Set custom system prompts per profile: + +```bash +# Inline instructions +arc instructions set work --from-file ./INSTRUCTIONS.md + +# Or point to a file (re-read on every launch) +arc instructions set work --file ./INSTRUCTIONS.md + +# View current instructions +arc instructions show work + +# Edit interactively +arc instructions edit work +``` + ## Verify Installation ```bash From 42c965fcf9042cae46bc3a272f2434c4724d6f72 Mon Sep 17 00:00:00 2001 From: Bailey Dixon Date: Fri, 17 Apr 2026 20:58:55 -0400 Subject: [PATCH 02/17] feat: dashboard enhancements, backup/export, profile cloning, sidebar queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ship four concurrent workstreams developed via parallel agent worktrees. - **Dashboard enhancements** — launch history store at ~/.arc/history.json (recordLaunch/getRecentLaunches), live DashView right column polling recent launches + activity log every 4s, ToastProvider + ToastContainer with auto-dismiss for in-app notifications. - **Backup/export/import** — `arc backup create/restore/list` with a custom gzipped archive format (ARCBAK01, no new deps), `arc profile export` and `arc profile import-file` for single-profile JSON transport with inlined instructions. Credentials excluded by default; path traversal validated on restore. - **Profile cloning** — `cloneProfile()` core fn (deep copy + configDir recursive copy), `arc profile clone [--no-copy-dir]` CLI, Shift+C inline clone keybind in ProfilesView. - **Interactive sidebar queue** — combined nav+profile selection in Sidebar with ↑/↓ navigation and Enter-to-launch on profile rows; owner of all sidebar input moved to Dashboard. Also restores ProviderConfig export + resolveInstructions fn + openai-compat auth type that were lost via older-base worktrees. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/cli/src/cli.ts | 64 ++++ packages/cli/src/commands/backup.ts | 360 ++++++++++++++++++++ packages/cli/src/commands/export.ts | 213 ++++++++++++ packages/cli/src/commands/launch.ts | 49 ++- packages/cli/src/commands/profile.ts | 41 ++- packages/cli/src/tui/Dashboard.tsx | 78 ++++- packages/cli/src/tui/components/Sidebar.tsx | 100 +++--- packages/cli/src/tui/components/Toast.tsx | 70 ++++ packages/cli/src/tui/useToast.ts | 94 +++++ packages/cli/src/tui/views/DashView.tsx | 125 +++++-- packages/cli/src/tui/views/ProfilesView.tsx | 80 ++++- packages/core/src/config.ts | 71 +++- packages/core/src/history.ts | 86 +++++ packages/core/src/index.ts | 1 + 14 files changed, 1343 insertions(+), 89 deletions(-) create mode 100644 packages/cli/src/commands/backup.ts create mode 100644 packages/cli/src/commands/export.ts create mode 100644 packages/cli/src/tui/components/Toast.tsx create mode 100644 packages/cli/src/tui/useToast.ts create mode 100644 packages/core/src/history.ts diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index eeccf60..256a62c 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -408,6 +408,15 @@ Examples: await mod.handleDelete(name, opts); }); + profile + .command("clone ") + .description("Clone an existing profile (copies config directory by default)") + .option("--no-copy-dir", "Clone the profile record only, skipping the config directory copy") + .action(async (src: string, dst: string, opts: { copyDir?: boolean }) => { + const mod = await import("./commands/profile.js"); + await mod.handleClone(src, dst, opts); + }); + profile .command("import") .description("Import existing agent tool config into a profile") @@ -420,6 +429,25 @@ Examples: await mod.handleImport(opts); }); + profile + .command("export ") + .description("Export a profile to a portable JSON file (inlines instructions)") + .option("--out ", "Output path (default: ./.arc-profile.json)") + .action(async (name: string, opts: { out?: string }) => { + const mod = await import("./commands/export.js"); + await mod.handleProfileExport(name, opts); + }); + + profile + .command("import-file ") + .description("Import a profile from a JSON file produced by `arc profile export`") + .option("--as ", "Rename the profile on import") + .option("--force", "Overwrite an existing profile with the same name") + .action(async (file: string, opts: { as?: string; force?: boolean }) => { + const mod = await import("./commands/export.js"); + await mod.handleProfileImport(file, opts); + }); + profile .command("set-flags ") .description("Set persistent launch flags for a profile (prepended on every launch)") @@ -856,6 +884,42 @@ Examples: } }); + // === Backup / Restore === + + const backup = program + .command("backup") + .description("Back up, restore, and list archives of the full ~/.arc/ state"); + + backup + .command("create") + .description("Create a gzipped archive of ~/.arc/ (excludes credentials/ by default)") + .option("--out ", "Output path (default: ~/.arc/backups/arc-backup-.tar.gz)") + .option("--exclude-credentials", "Exclude ~/.arc/credentials/ (default: on)", true) + .option("--include-credentials", "Include ~/.arc/credentials/ in the archive") + .action(async (opts: { out?: string; excludeCredentials?: boolean; includeCredentials?: boolean }) => { + const mod = await import("./commands/backup.js"); + const excludeCredentials = opts.includeCredentials ? false : opts.excludeCredentials ?? true; + await mod.handleBackupCreate({ out: opts.out, excludeCredentials }); + }); + + backup + .command("restore ") + .description("Restore a backup archive into ~/.arc/ (destructive)") + .option("--force", "Overwrite an existing ~/.arc/config.json") + .action(async (file: string, opts: { force?: boolean }) => { + const mod = await import("./commands/backup.js"); + await mod.handleBackupRestore(file, opts); + }); + + backup + .command("list") + .alias("ls") + .description("List archives in ~/.arc/backups/") + .action(async () => { + const mod = await import("./commands/backup.js"); + await mod.handleBackupList(); + }); + // === Advanced Commands === program diff --git a/packages/cli/src/commands/backup.ts b/packages/cli/src/commands/backup.ts new file mode 100644 index 0000000..bedcd79 --- /dev/null +++ b/packages/cli/src/commands/backup.ts @@ -0,0 +1,360 @@ +import fs from "node:fs"; +import path from "node:path"; +import zlib from "node:zlib"; +import { getArcDir, getConfigPath } from "../paths.js"; +import { success, error, info, warn } from "../display.js"; + +/** + * Custom archive format (gzipped): + * + * MAGIC = "ARCBAK01" (8 bytes, fixed) + * repeat per entry: + * pathLen (4 bytes, big-endian uint32) + * path (pathLen bytes, UTF-8, POSIX-style relative path) + * sizeLen (8 bytes, big-endian uint64; actually stored as BigInt) + * contents (sizeLen bytes, raw file bytes) + * + * Plain files only. Directories are implicit (recreated from paths during restore). + * Symlinks are skipped (reported as a warning). + * + * The whole blob is gzipped before being written to disk. + */ + +const MAGIC = Buffer.from("ARCBAK01", "utf-8"); + +interface ArchiveEntry { + relPath: string; + contents: Buffer; +} + +interface WalkOptions { + root: string; + skipDirs: Set; +} + +function walkFiles(opts: WalkOptions): string[] { + const results: string[] = []; + + function recurse(dir: string): void { + let entries: fs.Dirent[]; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { + return; + } + + for (const entry of entries) { + const full = path.join(dir, entry.name); + + if (opts.skipDirs.has(path.resolve(full))) { + continue; + } + + if (entry.isSymbolicLink()) { + // Skip symlinks to avoid escaping the archive root. + continue; + } + + if (entry.isDirectory()) { + recurse(full); + } else if (entry.isFile()) { + results.push(full); + } + // Skip sockets, fifos, block/char devices. + } + } + + recurse(opts.root); + return results; +} + +function toPosixRelative(base: string, full: string): string { + const rel = path.relative(base, full); + return rel.split(path.sep).join("/"); +} + +function writeUint32BE(value: number): Buffer { + const buf = Buffer.alloc(4); + buf.writeUInt32BE(value >>> 0, 0); + return buf; +} + +function writeBigUint64BE(value: bigint): Buffer { + const buf = Buffer.alloc(8); + buf.writeBigUInt64BE(value, 0); + return buf; +} + +function encodeArchive(entries: ArchiveEntry[]): Buffer { + const chunks: Buffer[] = [MAGIC]; + for (const entry of entries) { + const pathBuf = Buffer.from(entry.relPath, "utf-8"); + chunks.push(writeUint32BE(pathBuf.length)); + chunks.push(pathBuf); + chunks.push(writeBigUint64BE(BigInt(entry.contents.length))); + chunks.push(entry.contents); + } + return Buffer.concat(chunks); +} + +function decodeArchive(buf: Buffer): ArchiveEntry[] { + if (buf.length < MAGIC.length || !buf.subarray(0, MAGIC.length).equals(MAGIC)) { + throw new Error("Invalid archive: missing or wrong magic header (expected ARCBAK01)."); + } + const entries: ArchiveEntry[] = []; + let offset = MAGIC.length; + + while (offset < buf.length) { + if (offset + 4 > buf.length) { + throw new Error("Corrupt archive: truncated path length header."); + } + const pathLen = buf.readUInt32BE(offset); + offset += 4; + + if (offset + pathLen > buf.length) { + throw new Error("Corrupt archive: truncated path data."); + } + const relPath = buf.subarray(offset, offset + pathLen).toString("utf-8"); + offset += pathLen; + + if (offset + 8 > buf.length) { + throw new Error("Corrupt archive: truncated size header."); + } + const sizeBig = buf.readBigUInt64BE(offset); + offset += 8; + + if (sizeBig > BigInt(Number.MAX_SAFE_INTEGER)) { + throw new Error(`Corrupt archive: entry "${relPath}" declares impossibly large size.`); + } + const size = Number(sizeBig); + + if (offset + size > buf.length) { + throw new Error(`Corrupt archive: truncated contents for "${relPath}".`); + } + const contents = Buffer.from(buf.subarray(offset, offset + size)); + offset += size; + + entries.push({ relPath, contents }); + } + + return entries; +} + +function isoTimestamp(): string { + return new Date().toISOString().replace(/[:.]/g, "-"); +} + +function formatSize(bytes: number): string { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; + if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; + return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`; +} + +export async function handleBackupCreate(opts: { + out?: string; + excludeCredentials?: boolean; +}): Promise { + const arcDir = getArcDir(); + + if (!fs.existsSync(arcDir)) { + error(`ARC directory not found at ${arcDir}. Nothing to back up.`); + process.exit(1); + } + + const backupsDir = path.join(arcDir, "backups"); + fs.mkdirSync(backupsDir, { recursive: true }); + + const outPath = + opts.out !== undefined + ? path.resolve(opts.out) + : path.join(backupsDir, `arc-backup-${isoTimestamp()}.tar.gz`); + + // Ensure output parent exists. + fs.mkdirSync(path.dirname(outPath), { recursive: true }); + + const skipDirs = new Set(); + // Never recurse into the backups directory (don't archive our own output). + skipDirs.add(path.resolve(backupsDir)); + // By default exclude credentials (hot-swap snapshots are sensitive). + const excludeCreds = opts.excludeCredentials !== false; + if (excludeCreds) { + skipDirs.add(path.resolve(path.join(arcDir, "credentials"))); + } + + const files = walkFiles({ root: arcDir, skipDirs }); + + if (files.length === 0) { + warn("No files to archive (ARC directory is effectively empty)."); + } + + const entries: ArchiveEntry[] = []; + for (const full of files) { + let contents: Buffer; + try { + contents = fs.readFileSync(full); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + warn(`Skipping unreadable file "${full}": ${msg}`); + continue; + } + entries.push({ relPath: toPosixRelative(arcDir, full), contents }); + } + + const raw = encodeArchive(entries); + const gz = zlib.gzipSync(raw, { level: zlib.constants.Z_BEST_COMPRESSION }); + + try { + fs.writeFileSync(outPath, gz); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + error(`Failed to write archive to "${outPath}": ${msg}`); + process.exit(1); + } + + success(`Backup written: ${outPath}`); + info( + `Archived ${entries.length} file(s), ${formatSize(raw.length)} uncompressed, ${formatSize(gz.length)} compressed.`, + ); + if (excludeCreds) { + info("Note: ~/.arc/credentials/ was excluded. Pass --include-credentials to change (not yet exposed)."); + } +} + +export async function handleBackupRestore( + file: string, + opts: { force?: boolean }, +): Promise { + const archivePath = path.resolve(file); + if (!fs.existsSync(archivePath)) { + error(`Archive file not found: ${archivePath}`); + process.exit(1); + } + + const arcDir = getArcDir(); + const configPath = getConfigPath(); + + if (fs.existsSync(configPath) && !opts.force) { + warn(`Existing ARC config detected at ${configPath}.`); + warn("Restore is destructive — it overwrites files in-place."); + error("Refusing to continue without --force."); + process.exit(1); + } + + let gz: Buffer; + try { + gz = fs.readFileSync(archivePath); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + error(`Failed to read archive: ${msg}`); + process.exit(1); + } + + let raw: Buffer; + try { + raw = zlib.gunzipSync(gz); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + error(`Failed to gunzip archive: ${msg}`); + process.exit(1); + } + + let entries: ArchiveEntry[]; + try { + entries = decodeArchive(raw); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + error(msg); + process.exit(1); + } + + fs.mkdirSync(arcDir, { recursive: true }); + const arcDirResolved = path.resolve(arcDir); + + const restored: string[] = []; + for (const entry of entries) { + // Validate path: must be a relative POSIX-style path with no absolute/traversal parts. + if ( + entry.relPath.length === 0 || + entry.relPath.startsWith("/") || + entry.relPath.includes("\\") || + /^[a-zA-Z]:/.test(entry.relPath) + ) { + warn(`Skipping unsafe absolute path in archive: "${entry.relPath}"`); + continue; + } + + const segments = entry.relPath.split("/"); + if (segments.some((seg) => seg === "..")) { + warn(`Skipping path with traversal components: "${entry.relPath}"`); + continue; + } + + const destFull = path.resolve(arcDirResolved, ...segments); + const relCheck = path.relative(arcDirResolved, destFull); + if (relCheck.startsWith("..") || path.isAbsolute(relCheck)) { + warn(`Skipping path that escapes ARC dir: "${entry.relPath}"`); + continue; + } + + try { + fs.mkdirSync(path.dirname(destFull), { recursive: true }); + fs.writeFileSync(destFull, entry.contents); + restored.push(entry.relPath); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + warn(`Failed to write "${entry.relPath}": ${msg}`); + } + } + + success(`Restored ${restored.length} file(s) to ${arcDir}.`); + const preview = restored.slice(0, 10); + for (const p of preview) { + console.log(` ${p}`); + } + if (restored.length > preview.length) { + console.log(` ... and ${restored.length - preview.length} more`); + } +} + +export async function handleBackupList(): Promise { + const backupsDir = path.join(getArcDir(), "backups"); + + if (!fs.existsSync(backupsDir)) { + info(`No backups directory yet (${backupsDir}).`); + info("Create one with: arc backup create"); + return; + } + + let entries: fs.Dirent[]; + try { + entries = fs.readdirSync(backupsDir, { withFileTypes: true }); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + error(`Failed to read ${backupsDir}: ${msg}`); + process.exit(1); + } + + const files = entries + .filter((e) => e.isFile()) + .map((e) => { + const full = path.join(backupsDir, e.name); + const stat = fs.statSync(full); + return { name: e.name, full, size: stat.size, mtime: stat.mtime }; + }) + .sort((a, b) => b.mtime.getTime() - a.mtime.getTime()); + + if (files.length === 0) { + info(`No backup files found in ${backupsDir}.`); + return; + } + + console.log(); + for (const f of files) { + const when = f.mtime.toISOString(); + const size = formatSize(f.size).padStart(10); + console.log(` ${size} ${when} ${f.name}`); + } + console.log(); + info(`${files.length} backup(s) in ${backupsDir}`); +} diff --git a/packages/cli/src/commands/export.ts b/packages/cli/src/commands/export.ts new file mode 100644 index 0000000..5de3b84 --- /dev/null +++ b/packages/cli/src/commands/export.ts @@ -0,0 +1,213 @@ +import fs from "node:fs"; +import path from "node:path"; +import { loadConfig, saveConfig } from "../config.js"; +import { getProfileDir } from "../paths.js"; +import type { Profile } from "@axiom-labs/arc-core"; +import { success, error, info, warn } from "../display.js"; + +/** + * On-disk format for `arc profile export`. + * + * Version 1 inlines the referenced `instructionsFile` (when present) so that + * imports on other machines don't rely on that path existing. + */ +export interface ProfileExportManifest { + manifest: "arc-profile-export"; + manifestVersion: 1; + exportedAt: string; + arcVersion?: string; + name: string; + profile: Profile; + /** Verbatim contents of profile.instructionsFile, if it existed on disk. */ + instructionsContent?: string; +} + +const MANIFEST_KIND = "arc-profile-export"; +const MANIFEST_VERSION: 1 = 1; + +function slugify(name: string): string { + return name.replace(/[^a-zA-Z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "profile"; +} + +export async function handleProfileExport( + name: string, + opts: { out?: string }, +): Promise { + const config = loadConfig(); + const profile = config.profiles[name]; + if (!profile) { + error(`Profile "${name}" not found.`); + process.exit(1); + } + + // Deep-clone so we don't mutate the in-memory config. + const profileCopy: Profile = JSON.parse(JSON.stringify(profile)); + + let instructionsContent: string | undefined; + if (profileCopy.instructionsFile) { + try { + instructionsContent = fs.readFileSync(profileCopy.instructionsFile, "utf-8"); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + warn(`Could not inline instructions file "${profileCopy.instructionsFile}": ${msg}`); + } + } + + let arcVersion: string | undefined; + try { + const mod = await import("../version.js"); + arcVersion = mod.VERSION; + } catch { + // Optional — continue without. + } + + const manifest: ProfileExportManifest = { + manifest: MANIFEST_KIND, + manifestVersion: MANIFEST_VERSION, + exportedAt: new Date().toISOString(), + arcVersion, + name, + profile: profileCopy, + ...(instructionsContent !== undefined ? { instructionsContent } : {}), + }; + + const outPath = + opts.out !== undefined + ? path.resolve(opts.out) + : path.resolve(`./${slugify(name)}.arc-profile.json`); + + try { + fs.mkdirSync(path.dirname(outPath), { recursive: true }); + fs.writeFileSync(outPath, JSON.stringify(manifest, null, 2) + "\n", "utf-8"); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + error(`Failed to write "${outPath}": ${msg}`); + process.exit(1); + } + + success(`Exported profile "${name}" to ${outPath}`); + if (instructionsContent !== undefined) { + info(`Inlined instructions file (${instructionsContent.length} chars).`); + } +} + +function isValidManifest(value: unknown): value is ProfileExportManifest { + if (typeof value !== "object" || value === null) return false; + const m = value as Record; + if (m["manifest"] !== MANIFEST_KIND) return false; + if (typeof m["manifestVersion"] !== "number") return false; + if (typeof m["name"] !== "string") return false; + if (typeof m["profile"] !== "object" || m["profile"] === null) return false; + const p = m["profile"] as Record; + if (typeof p["authType"] !== "string") return false; + if (typeof p["configDir"] !== "string") return false; + if (typeof p["createdAt"] !== "string") return false; + return true; +} + +export async function handleProfileImport( + file: string, + opts: { as?: string; force?: boolean }, +): Promise { + const resolved = path.resolve(file); + if (!fs.existsSync(resolved)) { + error(`File not found: ${resolved}`); + process.exit(1); + } + + let raw: string; + try { + raw = fs.readFileSync(resolved, "utf-8"); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + error(`Failed to read "${resolved}": ${msg}`); + process.exit(1); + } + + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + error(`Invalid JSON in "${resolved}": ${msg}`); + process.exit(1); + } + + if (!isValidManifest(parsed)) { + error(`File "${resolved}" is not a valid ARC profile export (manifest check failed).`); + process.exit(1); + } + + const manifest: ProfileExportManifest = parsed; + + if (manifest.manifestVersion !== MANIFEST_VERSION) { + error( + `Unsupported manifest version ${manifest.manifestVersion}. This ARC build supports version ${MANIFEST_VERSION}.`, + ); + process.exit(1); + } + + const targetName = opts.as ?? manifest.name; + if (!/^[a-zA-Z0-9][a-zA-Z0-9_.-]*$/.test(targetName)) { + error(`Invalid profile name: "${targetName}".`); + process.exit(1); + } + + const config = loadConfig(); + const exists = Object.prototype.hasOwnProperty.call(config.profiles, targetName); + + if (exists && !opts.force && !opts.as) { + error(`Profile "${targetName}" already exists. Pass --force to overwrite or --as to rename.`); + process.exit(1); + } + if (exists && opts.as) { + // User explicitly renamed but collided with something else. + if (!opts.force) { + error(`Profile "${targetName}" (from --as) already exists. Pass --force to overwrite.`); + process.exit(1); + } + } + + // Clone, then optionally re-target configDir and instructionsFile to the new name. + const imported: Profile = JSON.parse(JSON.stringify(manifest.profile)); + + // If the profile had inline instructions content, write it into the new profile's dir. + if (manifest.instructionsContent !== undefined) { + const profileDir = getProfileDir(targetName); + fs.mkdirSync(profileDir, { recursive: true }); + const instructionsPath = path.join(profileDir, "instructions.md"); + try { + fs.writeFileSync(instructionsPath, manifest.instructionsContent, "utf-8"); + imported.instructionsFile = instructionsPath; + delete imported.instructions; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + warn(`Failed to write instructions to "${instructionsPath}": ${msg}`); + } + } else if (imported.instructionsFile && opts.as) { + // Renamed on import — the old instructionsFile path likely points at a + // foreign profile dir. Keep it as-is but warn. + warn( + `Profile references instructionsFile "${imported.instructionsFile}" — verify it exists on this machine.`, + ); + } + + // Reset configDir to a fresh per-profile dir (do not reuse the source machine's path). + imported.configDir = getProfileDir(targetName); + fs.mkdirSync(imported.configDir, { recursive: true }); + + config.profiles[targetName] = imported; + if (!config.activeProfile) { + config.activeProfile = targetName; + } + saveConfig(config); + + success( + exists + ? `Overwrote profile "${targetName}" from ${resolved}` + : `Imported profile "${targetName}" from ${resolved}`, + ); + if (manifest.instructionsContent !== undefined) { + info(`Wrote instructions to ${imported.instructionsFile}`); + } +} diff --git a/packages/cli/src/commands/launch.ts b/packages/cli/src/commands/launch.ts index f43c6ed..1d83d9f 100644 --- a/packages/cli/src/commands/launch.ts +++ b/packages/cli/src/commands/launch.ts @@ -3,13 +3,14 @@ import fs from "node:fs"; import path from "node:path"; import { loadConfig } from "../config.js"; import { buildProfileEnv } from "../auth.js"; -import { resolveEffectiveProfile, resolveInstructions } from "@axiom-labs/arc-core"; +import { resolveEffectiveProfile } from "@axiom-labs/arc-core"; import { error, info, warn, cmd } from "../display.js"; import { logAction } from "../log.js"; import { getAdapter } from "../adapters/index.js"; import { waitForProcessExit } from "@axiom-labs/arc-core"; import { createDefaultHookBus } from "@axiom-labs/arc-core"; import { writeLogEvent, queryLogEvents } from "@axiom-labs/arc-core"; +import { recordLaunch } from "@axiom-labs/arc-core"; import { SessionStore, isResumeIntent } from "@axiom-labs/arc-core"; import { TelemetryProvider, JsonFileExporter, startSessionSpan } from "@axiom-labs/arc-core"; import { CircuitBreaker } from "@axiom-labs/arc-core"; @@ -211,19 +212,6 @@ export async function handleLaunch( const profileEnv = await buildProfileEnv(profile, profileName); - // ─── Resolve and inject agent instructions ───────────────────────── - const instructionsText = resolveInstructions(profile); - if (instructionsText) { - profileEnv["ARC_AGENT_INSTRUCTIONS"] = instructionsText; - writeLogEvent({ - level: "info", - component: "launch", - action: "instructions:resolved", - message: `Agent instructions loaded (${instructionsText.length} chars)`, - data: { profile: profileName, source: profile.instructionsFile ? "file" : "inline" }, - }); - } - if (!findBinary(tool)) { error(`Binary "${tool}" not found on PATH.`); warn(getInstallHint(tool)); @@ -384,6 +372,13 @@ export async function handleLaunch( } } + recordLaunch({ + profile: profileName, + tool, + timestamp: new Date().toISOString(), + outcome: "started", + }); + let agentProcess: AgentProcess | null = null; try { agentProcess = await adapter.launch(profile, { @@ -399,6 +394,12 @@ export async function handleLaunch( agentProcess = null; } else { // Real error from a real adapter + recordLaunch({ + profile: profileName, + tool, + timestamp: new Date().toISOString(), + outcome: "failed", + }); error(`Failed to launch ${tool}: ${msg}`); process.exit(1); } @@ -502,6 +503,13 @@ export async function handleLaunch( // Block until the child process exits await waitForProcessExit(agentProcess.pid); + recordLaunch({ + profile: profileName, + tool, + timestamp: new Date().toISOString(), + outcome: "exited", + exitCode: 0, + }); await finalizeCoreModules(0); process.exit(0); } @@ -526,12 +534,25 @@ export async function handleLaunch( }); if (result.error) { + recordLaunch({ + profile: profileName, + tool, + timestamp: new Date().toISOString(), + outcome: "failed", + }); await finalizeCoreModules(1); error(`Failed to launch ${tool}: ${result.error.message}`); process.exit(1); } const exitCode = result.status ?? 0; + recordLaunch({ + profile: profileName, + tool, + timestamp: new Date().toISOString(), + outcome: exitCode === 0 ? "exited" : "failed", + exitCode, + }); await finalizeCoreModules(exitCode); process.exit(exitCode); } diff --git a/packages/cli/src/commands/profile.ts b/packages/cli/src/commands/profile.ts index c9075ff..9b4d547 100644 --- a/packages/cli/src/commands/profile.ts +++ b/packages/cli/src/commands/profile.ts @@ -2,7 +2,7 @@ import path from "node:path"; import os from "node:os"; import fs from "node:fs"; import type { AuthType } from "../types.js"; -import { loadConfig, saveConfig, resolveProfileName, resolveProfile } from "../config.js"; +import { loadConfig, saveConfig, resolveProfileName, resolveProfile, cloneProfile } from "../config.js"; import { success, error, info, detail, profileTable } from "../display.js"; import { createProfile, importProfile, validateName } from "../tui/createProfile.js"; @@ -206,4 +206,43 @@ export async function handleImport( detail(`Config: ${loadConfig().profiles[opts.name]?.configDir ?? "(unknown)"}`); } +export async function handleClone( + src: string, + dst: string, + opts?: { copyDir?: boolean } +): Promise { + const config = loadConfig(); + + if (!config.profiles[src]) { + error(`Source profile "${src}" not found.`); + process.exit(1); + } + + // Validate dst name (reuse the TUI's shared name validator) + const nameError = validateName(dst, Object.keys(config.profiles)); + if (nameError) { + error(nameError); + process.exit(1); + } + + const sourceDir = config.profiles[src].configDir; + const sourceDirExists = sourceDir ? fs.existsSync(sourceDir) : false; + const copyConfigDir = opts?.copyDir !== false; + + try { + const updated = cloneProfile(config, src, dst, { copyConfigDir }); + saveConfig(updated); + } catch (err) { + error(err instanceof Error ? err.message : String(err)); + process.exit(1); + } + + success(`Profile "${dst}" cloned from "${src}".`); + if (copyConfigDir && !sourceDirExists) { + info(`Source config directory was missing — cloned profile record only.`); + } + const newDir = loadConfig().profiles[dst]?.configDir ?? "(unknown)"; + detail(`Config: ${newDir}`); +} + diff --git a/packages/cli/src/tui/Dashboard.tsx b/packages/cli/src/tui/Dashboard.tsx index c86ce7f..69e63f0 100644 --- a/packages/cli/src/tui/Dashboard.tsx +++ b/packages/cli/src/tui/Dashboard.tsx @@ -1,9 +1,16 @@ -import { useMemo, useState } from "react"; +import { useEffect, useMemo, useState } from "react"; import { Box, Text, useApp, useInput } from "ink"; import { Spinner } from "@inkjs/ui"; import { useScreenSize } from "fullscreen-ink"; import { Layout } from "./components/Layout.js"; -import { Sidebar, type ViewName } from "./components/Sidebar.js"; +import { + Sidebar, + NAV_ITEMS, + SIDEBAR_PROFILES_START, + sidebarSelectableCount, + sidebarProfileCount, + type ViewName, +} from "./components/Sidebar.js"; import { SessionView } from "./views/SessionView.js"; import { ProfilesView } from "./views/ProfilesView.js"; import { SettingsView } from "./views/SettingsView.js"; @@ -26,6 +33,8 @@ import { ProfileInfoOverlay } from "./views/ProfileInfoOverlay.js"; import { OnboardingScreen } from "./views/OnboardingScreen.js"; import { useProfiles } from "./useProfiles.js"; import { useTheme } from "./theme.js"; +import { ToastProvider, useToast } from "./useToast.js"; +import { ToastContainer } from "./components/Toast.js"; import { runSelfUpdate } from "../update.js"; import { handleLaunch } from "../commands/launch.js"; import { markLaunchPending } from "./render.js"; @@ -35,6 +44,15 @@ const MIN_HEIGHT = 18; type OverlayName = "palette" | "help" | "create" | "updating" | "swap" | "about" | "shared-detail" | "profile-info" | null; export function Dashboard() { + return ( + + + + ); +} + +function DashboardInner() { + const { toasts } = useToast(); const { profiles, loading, config, reload } = useProfiles(); const { exit } = useApp(); const { toggleTheme, theme } = useTheme(); @@ -46,6 +64,24 @@ export function Dashboard() { const [workspaceTyping, setWorkspaceTyping] = useState(false); const [activity, setActivity] = useState([]); const [infoProfile, setInfoProfile] = useState(null); + // 0-indexed position in the combined sidebar list [nav..., visible profiles...]. + const [sidebarSelection, setSidebarSelection] = useState(0); + + const selectableCount = sidebarSelectableCount(profiles); + useEffect(() => { + if (selectableCount === 0) return; + if (sidebarSelection >= selectableCount) { + setSidebarSelection(selectableCount - 1); + } + }, [selectableCount, sidebarSelection]); + + useEffect(() => { + const navIdx = NAV_ITEMS.findIndex((item) => item.view === activeView); + if (navIdx >= 0 && sidebarSelection < NAV_ITEMS.length) { + setSidebarSelection(navIdx); + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [activeView]); const paletteItems: PaletteItem[] = [ { id: "dash", label: "Dashboard", description: "Overview and status" }, @@ -183,6 +219,39 @@ export function Dashboard() { handleTriggerUpdate(); return; } + + // --- Sidebar navigation (combined nav + profile queue) --- + const profileCount = sidebarProfileCount(profiles); + const total = NAV_ITEMS.length + profileCount; + + if (key.upArrow) { + if (total === 0) return; + setSidebarSelection((prev) => Math.max(0, prev - 1)); + return; + } + + if (key.downArrow) { + if (total === 0) return; + setSidebarSelection((prev) => Math.min(total - 1, prev + 1)); + return; + } + + if (key.return) { + if (sidebarSelection < NAV_ITEMS.length) { + const item = NAV_ITEMS[sidebarSelection]; + if (item) setActiveView(item.view); + return; + } + const profileIdx = sidebarSelection - SIDEBAR_PROFILES_START; + const profile = profiles.slice(0, profileCount)[profileIdx]; + if (profile) { + markLaunchPending(); + setTimeout(() => { + handleLaunch(profile.name, [], { beforeSpawn: exit }); + }, 0); + } + return; + } }, { isActive: !showOnboarding }); const tooSmall = width < MIN_WIDTH || height < MIN_HEIGHT; @@ -371,13 +440,12 @@ export function Dashboard() { sidebar={ } - content={content} + content={<>{content}} overlay={overlayNode} overlayOpen={overlay !== null} overlayName={overlay} diff --git a/packages/cli/src/tui/components/Sidebar.tsx b/packages/cli/src/tui/components/Sidebar.tsx index cf3015f..21b8216 100644 --- a/packages/cli/src/tui/components/Sidebar.tsx +++ b/packages/cli/src/tui/components/Sidebar.tsx @@ -1,10 +1,10 @@ -import { Box, Text, useInput } from "ink"; +import { Box, Text } from "ink"; import { useTheme } from "../theme.js"; import type { ProfileEntry } from "../useProfiles.js"; export type ViewName = "dash" | "workspace" | "profiles" | "about" | "doctor" | "settings" | "tasks" | "memory" | "skills" | "sync" | "telemetry" | "agents"; -const NAV_ITEMS: { view: ViewName; label: string }[] = [ +export const NAV_ITEMS: { view: ViewName; label: string }[] = [ { view: "dash", label: "Dash" }, { view: "workspace", label: "Work" }, { view: "profiles", label: "Profiles" }, @@ -19,45 +19,51 @@ const NAV_ITEMS: { view: ViewName; label: string }[] = [ { view: "agents", label: "Agents" }, ]; +/** Max number of profile rows rendered in the sidebar queue. */ +export const SIDEBAR_PROFILE_LIMIT = 5; + +/** + * Returns the count of selectable profile rows in the sidebar (capped). + */ +export function sidebarProfileCount(profiles: ProfileEntry[]): number { + return Math.min(profiles.length, SIDEBAR_PROFILE_LIMIT); +} + +/** + * Total selectable rows in the sidebar (nav items + visible profile rows). + */ +export function sidebarSelectableCount(profiles: ProfileEntry[]): number { + return NAV_ITEMS.length + sidebarProfileCount(profiles); +} + +/** + * Index at which profile rows begin in the combined selection list. + */ +export const SIDEBAR_PROFILES_START = NAV_ITEMS.length; + interface SidebarProps { activeView: ViewName; - onViewChange: (view: ViewName) => void; profiles: ProfileEntry[]; focusedPane: "sidebar" | "content"; - inputEnabled: boolean; + /** + * 0-indexed position in the combined [nav..., profiles...] list. + * Drives the visual highlight when sidebar is focused. + */ + selectedIndex: number; } export function Sidebar({ activeView, - onViewChange, profiles, focusedPane, - inputEnabled, + selectedIndex, }: SidebarProps) { const { theme } = useTheme(); const { colors } = theme; const isFocused = focusedPane === "sidebar"; - const navIndex = Math.max(0, NAV_ITEMS.findIndex((item) => item.view === activeView)); const activeProfile = profiles.find((profile) => profile.active); const readyCount = profiles.filter((profile) => profile.credential?.authenticated).length; - - useInput( - (_, key) => { - if (!isFocused || !inputEnabled) return; - - if (key.upArrow) { - const previous = NAV_ITEMS[Math.max(0, navIndex - 1)]; - if (previous) onViewChange(previous.view); - return; - } - - if (key.downArrow) { - const next = NAV_ITEMS[Math.min(NAV_ITEMS.length - 1, navIndex + 1)]; - if (next) onViewChange(next.view); - } - }, - { isActive: isFocused && inputEnabled } - ); + const visibleProfiles = profiles.slice(0, SIDEBAR_PROFILE_LIMIT); return ( @@ -77,7 +83,7 @@ export function Sidebar({ {NAV_ITEMS.map((item, index) => { const isActive = item.view === activeView; - const isHighlighted = isFocused && index === navIndex; + const isHighlighted = isFocused && index === selectedIndex; let textColor = colors.dimmed; if (isActive) textColor = colors.primary; @@ -115,22 +121,38 @@ export function Sidebar({ {/* Queue */} {"─".repeat(14)} - {profiles.length === 0 ? ( + {visibleProfiles.length === 0 ? ( no profiles ) : ( - profiles.slice(0, 5).map((profile) => ( - - - {profile.active ? "● " : "○ "} - - { + const combinedIndex = SIDEBAR_PROFILES_START + index; + const isHighlighted = isFocused && combinedIndex === selectedIndex; + return ( + - {profile.name} - - - )) + + {isHighlighted ? "›" : " "} + + + {profile.active ? "● " : " ○ "} + + + {profile.name} + + + ); + }) )} diff --git a/packages/cli/src/tui/components/Toast.tsx b/packages/cli/src/tui/components/Toast.tsx new file mode 100644 index 0000000..df98122 --- /dev/null +++ b/packages/cli/src/tui/components/Toast.tsx @@ -0,0 +1,70 @@ +import { Box, Text } from "ink"; +import { useTheme } from "../theme.js"; +import type { ToastItem, ToastKind } from "../useToast.js"; + +function colorForKind( + kind: ToastKind, + colors: ReturnType["theme"]["colors"] +): string { + switch (kind) { + case "success": + return colors.success; + case "error": + return colors.error; + case "warn": + return colors.warning; + case "info": + default: + return colors.primary; + } +} + +function iconForKind(kind: ToastKind): string { + switch (kind) { + case "success": + return "\u2714"; + case "error": + return "\u2716"; + case "warn": + return "\u26A0"; + case "info": + default: + return "\u2139"; + } +} + +interface ToastProps { + toast: ToastItem; +} + +export function Toast({ toast }: ToastProps) { + const { theme } = useTheme(); + const color = colorForKind(toast.kind, theme.colors); + + return ( + + {iconForKind(toast.kind)} + {toast.message} + + ); +} + +interface ToastContainerProps { + toasts: ToastItem[]; +} + +export function ToastContainer({ toasts }: ToastContainerProps) { + if (toasts.length === 0) return null; + return ( + + {toasts.map((toast) => ( + + ))} + + ); +} diff --git a/packages/cli/src/tui/useToast.ts b/packages/cli/src/tui/useToast.ts new file mode 100644 index 0000000..6d46707 --- /dev/null +++ b/packages/cli/src/tui/useToast.ts @@ -0,0 +1,94 @@ +import { + createContext, + useCallback, + useContext, + useEffect, + useRef, + useState, + createElement, + type ReactNode, +} from "react"; + +export type ToastKind = "info" | "success" | "error" | "warn"; + +export interface ToastItem { + id: string; + message: string; + kind: ToastKind; + createdAt: number; +} + +interface ToastContextValue { + toasts: ToastItem[]; + showToast: (message: string, kind?: ToastKind) => void; + dismissToast: (id: string) => void; +} + +const ToastContext = createContext(undefined); + +const TOAST_DURATION_MS = 2500; + +let toastIdCounter = 0; +function nextToastId(): string { + toastIdCounter += 1; + return `toast-${Date.now()}-${toastIdCounter}`; +} + +interface ToastProviderProps { + children: ReactNode; +} + +export function ToastProvider({ children }: ToastProviderProps) { + const [toasts, setToasts] = useState([]); + const timersRef = useRef(new Map>()); + + const dismissToast = useCallback((id: string) => { + const timer = timersRef.current.get(id); + if (timer) { + clearTimeout(timer); + timersRef.current.delete(id); + } + setToasts((current) => current.filter((t) => t.id !== id)); + }, []); + + const showToast = useCallback( + (message: string, kind: ToastKind = "info") => { + const id = nextToastId(); + const toast: ToastItem = { + id, + message, + kind, + createdAt: Date.now(), + }; + setToasts((current) => [...current, toast]); + const timer = setTimeout(() => { + timersRef.current.delete(id); + setToasts((current) => current.filter((t) => t.id !== id)); + }, TOAST_DURATION_MS); + timersRef.current.set(id, timer); + }, + [] + ); + + useEffect(() => { + const timers = timersRef.current; + return () => { + for (const timer of timers.values()) { + clearTimeout(timer); + } + timers.clear(); + }; + }, []); + + const value: ToastContextValue = { toasts, showToast, dismissToast }; + + return createElement(ToastContext.Provider, { value }, children); +} + +export function useToast(): ToastContextValue { + const ctx = useContext(ToastContext); + if (!ctx) { + throw new Error("useToast must be used within a ToastProvider"); + } + return ctx; +} diff --git a/packages/cli/src/tui/views/DashView.tsx b/packages/cli/src/tui/views/DashView.tsx index 20777ea..387f8f0 100644 --- a/packages/cli/src/tui/views/DashView.tsx +++ b/packages/cli/src/tui/views/DashView.tsx @@ -7,6 +7,12 @@ import { VERSION } from "../../version.js"; import { ImportHint } from "../components/ImportHint.js"; import { detectToolConfigs, type DetectedTool } from "../../detect.js"; import { checkForUpdate, type UpdateInfo } from "../../update.js"; +import { + getRecentLaunches, + queryLogEvents, + type LaunchHistoryEntry, + type LogEvent, +} from "@axiom-labs/arc-core"; import type { ProfileEntry } from "../useProfiles.js"; interface Props { @@ -146,39 +152,108 @@ function LeftColumn({ profiles, colors, isDark }: { ); } -/* ── Right Column: pipeline + status ───────────────────────────────── */ +/* ── Right Column: recent launches + activity ──────────────────────── */ + +function formatTime(iso: string): string { + try { + const d = new Date(iso); + if (Number.isNaN(d.getTime())) return "--:--"; + const hh = String(d.getHours()).padStart(2, "0"); + const mm = String(d.getMinutes()).padStart(2, "0"); + return `${hh}:${mm}`; + } catch { + return "--:--"; + } +} + +function outcomeColor(outcome: string, colors: ThemeColors): string { + switch (outcome) { + case "started": + return colors.primary; + case "exited": + return colors.success; + case "failed": + return colors.error; + default: + return colors.dimmed; + } +} function RightColumn({ colors }: { colors: ThemeColors }) { - // TODO: Wire to real hook runner state + data stores - // For now, show idle/empty state — don't fake activity + const [launches, setLaunches] = useState([]); + const [activity, setActivity] = useState([]); + + useEffect(() => { + const refresh = () => { + try { + setLaunches(getRecentLaunches(5)); + } catch { + setLaunches([]); + } + try { + setActivity(queryLogEvents({ limit: 5 })); + } catch { + setActivity([]); + } + }; + refresh(); + const timer = setInterval(refresh, 4000); + return () => clearInterval(timer); + }, []); return ( - - - {"░".repeat(5)} - {"░".repeat(5)} - {"░".repeat(5)} - {"░".repeat(5)} - - - PRE - VAL - POST - DONE - - - - + + + {launches.length === 0 ? ( + No launches yet. + ) : ( + launches.map((entry, idx) => ( + + + {formatTime(entry.timestamp)} + + + {entry.profile} + + + + {entry.tool} + + + {entry.outcome} + + + )) + )} - + - - - - - + {activity.length === 0 ? ( + No activity yet. + ) : ( + activity + .slice() + .reverse() + .map((event, idx) => { + const detail = + event.message ?? event.detail ?? event.action ?? ""; + return ( + + + + {formatTime(event.timestamp)} + + + + {event.action} + + {detail} + + ); + }) + )} ); diff --git a/packages/cli/src/tui/views/ProfilesView.tsx b/packages/cli/src/tui/views/ProfilesView.tsx index 2e76bea..fe4147f 100644 --- a/packages/cli/src/tui/views/ProfilesView.tsx +++ b/packages/cli/src/tui/views/ProfilesView.tsx @@ -4,12 +4,12 @@ import { Box, Text, useInput } from "ink"; import { Spinner } from "@inkjs/ui"; import { useTheme } from "../theme.js"; import { ProfileList } from "../components/ProfileList.js"; -import { saveConfig, loadConfig } from "../../config.js"; +import { saveConfig, loadConfig, cloneProfile } from "../../config.js"; import { syncSharedToProfile, unsyncSharedFromProfile, getSharedManifest, pullProfileToShared } from "../../shared.js"; -import { RENDER_DEFER_MS } from "../createProfile.js"; +import { RENDER_DEFER_MS, validateName } from "../createProfile.js"; import type { ProfileEntry } from "../useProfiles.js"; -type Action = "idle" | "launching" | "confirm-delete" | "edit-flags"; +type Action = "idle" | "launching" | "confirm-delete" | "edit-flags" | "clone"; interface Props { profiles: ProfileEntry[]; @@ -40,6 +40,8 @@ export function ProfilesView({ const [message, setMessage] = useState(null); const [deleteTarget, setDeleteTarget] = useState(null); const [flagsInput, setFlagsInput] = useState(""); + const [cloneSource, setCloneSource] = useState(null); + const [cloneInput, setCloneInput] = useState(""); const showMessage = useCallback((msg: string) => { setMessage(msg); @@ -98,6 +100,54 @@ export function ProfilesView({ return; } + // ── Clone mode ── + if (action === "clone") { + if (key.escape) { + setAction("idle"); + setCloneInput(""); + setCloneSource(null); + showMessage("Clone cancelled"); + return; + } + if (key.return) { + const src = cloneSource; + const dst = cloneInput.trim(); + if (!src) { + setAction("idle"); + setCloneInput(""); + setCloneSource(null); + return; + } + try { + const config = loadConfig(); + const nameError = validateName(dst, Object.keys(config.profiles)); + if (nameError) { + showMessage(nameError); + return; + } + const updated = cloneProfile(config, src, dst, { copyConfigDir: true }); + saveConfig(updated); + showMessage(`Cloned ${src} → ${dst}`); + reload(); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + showMessage(`Clone failed: ${msg}`); + } + setAction("idle"); + setCloneInput(""); + setCloneSource(null); + return; + } + if (key.backspace || key.delete) { + setCloneInput((v) => v.slice(0, -1)); + return; + } + if (!key.ctrl && !key.meta && input.length === 1) { + setCloneInput((v) => v + input); + } + return; + } + // ── Edit flags mode ── if (action === "edit-flags") { if (key.escape) { @@ -190,6 +240,14 @@ export function ProfilesView({ return; } + // [C] clone profile (uppercase — lowercase `c` still means create) + if (input === "C") { + setCloneSource(selected.name); + setCloneInput(""); + setAction("clone"); + return; + } + if (input === "c") { onCreateProfile?.(); return; @@ -353,6 +411,20 @@ export function ProfilesView({ )} + {/* Clone mode */} + {action === "clone" && ( + + + Clone {cloneSource} as: + {cloneInput} + {"\u258C"} + + + enter save esc cancel + + + )} + {/* Edit flags mode */} {action === "edit-flags" && ( @@ -389,7 +461,7 @@ export function ProfilesView({ {!loading && action === "idle" && ( - {"\u21B5"} launch s switch i info d delete h sync shift+h push shift+s source f flags c create + {"\u21B5"} launch s switch i info d delete h sync shift+h push shift+s source f flags c create shift+c clone )} diff --git a/packages/core/src/config.ts b/packages/core/src/config.ts index 47fc156..e7e6ab8 100644 --- a/packages/core/src/config.ts +++ b/packages/core/src/config.ts @@ -1,7 +1,7 @@ import crypto from "node:crypto"; import fs from "node:fs"; import path from "node:path"; -import { getArcDir, getConfigPath } from "./paths.js"; +import { getArcDir, getConfigPath, getProfileDir } from "./paths.js"; import { deepMerge } from "./shared-fs.js"; import type { ArcConfig, Profile } from "./types.js"; @@ -191,3 +191,72 @@ export function resolveInstructions(profile: Profile): string | undefined { } return profile.instructions; } + +export interface CloneProfileOptions { + /** When true (default), recursively copy the source configDir to the new profile directory. */ + copyConfigDir?: boolean; +} + +/** + * Deep-copy a profile record from `src` to `dst`, resetting `createdAt` and + * (optionally) copying the on-disk configDir to the new profile's location. + * + * Mutates and returns `config`. Callers are responsible for persisting via saveConfig(). + * + * Throws if `src` does not exist or `dst` already exists. If the source + * configDir has been deleted, the profile record is still cloned but the + * directory copy is silently skipped (warning is left to the caller's UI). + */ +export function cloneProfile( + config: ArcConfig, + src: string, + dst: string, + opts?: CloneProfileOptions +): ArcConfig { + if (!config.profiles[src]) { + throw new Error(`Source profile '${src}' not found`); + } + if (src === dst) { + throw new Error(`Destination name must differ from source ('${src}')`); + } + if (config.profiles[dst]) { + throw new Error(`Profile '${dst}' already exists`); + } + + const source = config.profiles[src]; + + // Deep-copy the profile record. structuredClone is available in Node 17+. + const cloned: Profile = + typeof structuredClone === "function" + ? (structuredClone(source) as Profile) + : (JSON.parse(JSON.stringify(source)) as Profile); + + cloned.createdAt = new Date().toISOString(); + + const copyConfigDir = opts?.copyConfigDir !== false; + if (copyConfigDir) { + const newDir = getProfileDir(dst); + const srcDir = source.configDir; + const srcExists = srcDir && fs.existsSync(srcDir); + + if (srcExists) { + fs.mkdirSync(newDir, { recursive: true }); + fs.cpSync(srcDir, newDir, { + recursive: true, + force: true, + dereference: true, + filter: (s: string) => { + const base = path.basename(s); + return base !== "node_modules" && base !== ".bin"; + }, + }); + } else { + // Source dir missing — still create an empty profile dir so launches don't explode. + fs.mkdirSync(newDir, { recursive: true }); + } + cloned.configDir = newDir; + } + + config.profiles[dst] = cloned; + return config; +} diff --git a/packages/core/src/history.ts b/packages/core/src/history.ts new file mode 100644 index 0000000..2fed270 --- /dev/null +++ b/packages/core/src/history.ts @@ -0,0 +1,86 @@ +import crypto from "node:crypto"; +import fs from "node:fs"; +import path from "node:path"; +import { getArcDir } from "./paths.js"; + +export type LaunchOutcome = "started" | "exited" | "failed"; + +export interface LaunchHistoryEntry { + profile: string; + tool: string; + timestamp: string; + outcome: LaunchOutcome; + exitCode?: number; +} + +const MAX_HISTORY_ENTRIES = 200; + +export function getHistoryPath(): string { + return path.join(getArcDir(), "history.json"); +} + +function readHistory(): LaunchHistoryEntry[] { + try { + const historyPath = getHistoryPath(); + if (!fs.existsSync(historyPath)) { + return []; + } + const raw = fs.readFileSync(historyPath, "utf-8"); + const parsed = JSON.parse(raw) as unknown; + if (!Array.isArray(parsed)) { + return []; + } + return parsed.filter((entry): entry is LaunchHistoryEntry => { + if (typeof entry !== "object" || entry === null) return false; + const record = entry as Record; + return ( + typeof record["profile"] === "string" && + typeof record["tool"] === "string" && + typeof record["timestamp"] === "string" && + typeof record["outcome"] === "string" + ); + }); + } catch { + return []; + } +} + +function writeHistory(entries: LaunchHistoryEntry[]): void { + try { + const dir = getArcDir(); + fs.mkdirSync(dir, { recursive: true }); + const historyPath = getHistoryPath(); + const tempPath = path.join( + dir, + `history.tmp.${crypto.randomBytes(4).toString("hex")}` + ); + fs.writeFileSync(tempPath, JSON.stringify(entries, null, 2) + "\n", "utf-8"); + if (process.platform !== "win32") { + fs.chmodSync(tempPath, 0o600); + } + fs.renameSync(tempPath, historyPath); + } catch { + // History writes must never crash ARC. + } +} + +export function recordLaunch(entry: LaunchHistoryEntry): void { + try { + const entries = readHistory(); + entries.push(entry); + const trimmed = + entries.length > MAX_HISTORY_ENTRIES + ? entries.slice(entries.length - MAX_HISTORY_ENTRIES) + : entries; + writeHistory(trimmed); + } catch { + // Non-fatal. + } +} + +export function getRecentLaunches(limit = 10): LaunchHistoryEntry[] { + const entries = readHistory(); + const reversed = [...entries].reverse(); + if (limit <= 0) return reversed; + return reversed.slice(0, limit); +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 0b24272..dadb1f8 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -5,6 +5,7 @@ export * from "./workspace.js"; export * from "./import-utils.js"; export * from "./keyring.js"; export * from "./secrets/index.js"; +export * from "./history.js"; export * from "./lifecycle.js"; export * from "./logging.js"; export * from "./paths.js"; From 8e9f9654431879c90a29888847945eb41033ffb2 Mon Sep 17 00:00:00 2001 From: Bailey Dixon Date: Fri, 17 Apr 2026 21:08:37 -0400 Subject: [PATCH 03/17] docs: Mark shipped items in FEATURES.md Check off profile cloning, launch history on Dash, toast notifications, interactive sidebar queue, and backup/export/import. Remove the same items from the Remaining UX Backlog section. Co-Authored-By: Claude Opus 4.7 (1M context) --- FEATURES.md | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/FEATURES.md b/FEATURES.md index 4a1869a..ed51e4d 100644 --- a/FEATURES.md +++ b/FEATURES.md @@ -30,18 +30,18 @@ Tracking file for planned features, enhancements, and ideas. Checked items are s - [x] **Agent instructions** — `instructions` / `instructionsFile` fields on Profile, resolved at launch, injected as `ARC_AGENT_INSTRUCTIONS` env var; `arc instructions` CLI (show/set/edit/clear) - [x] **OpenAI-compatible providers** — `openai-compat` auth type + `ProviderConfig` on Profile (baseUrl, model, apiKeyEnvVar); 7 presets (OpenRouter, Ollama, LM Studio, Together, Groq, MiniMax, DeepSeek); `arc provider` CLI (set/show/clear/presets) - [ ] **Team/shared config** — repo-checked config with local secret overlays -- [ ] **Backup/export/import** — move profiles and settings between machines +- [x] **Backup/export/import** — `arc backup create/restore/list` (gzipped archive of `~/.arc/`, credentials excluded by default) + `arc profile export` / `arc profile import-file` (single-profile JSON transport with inlined instructions) - [x] **Managed updates** — self-update system with npm registry check and TUI update banner ## Priority 4 — Observability & Polish -- [ ] **Launch history on Dash** — recent launches list (`{ profile, tool, timestamp }`) in `~/.arc/history.json`, displayed on Dash after first session +- [x] **Launch history on Dash** — `~/.arc/history.json` records each launch (profile, tool, timestamp, outcome, exitCode); DashView RightColumn shows recent launches + recent activity log entries (polled) - [x] **Shared layer visibility** — SettingsView shows per-profile sync details; ProfileList shows shared indicator column -- [ ] **Toast notifications** — brief auto-dismiss messages for confirmations/errors that work across all views -- [ ] **Interactive sidebar queue** — Enter on sidebar profile list to quick-launch without switching views +- [x] **Toast notifications** — `ToastProvider` + `useToast()` hook with auto-dismiss (2.5s); `ToastContainer` mounted in Dashboard +- [x] **Interactive sidebar queue** — combined nav+profile selection in Sidebar; `↑/↓` cycles through nav items then profiles; Enter on a profile row quick-launches without switching views - [x] **MCP server management** — MCP host manager with connect/disconnect/list/getTools + callTool with risk classification (Phase 8) - [x] **Policy layer** — three-tier permission model (coordinator/interactive/worker) with deny > ask > allow precedence (Phase 20) -- [ ] **Profile cloning/duplication** — create a new profile from an existing one as template +- [x] **Profile cloning/duplication** — `cloneProfile()` core fn + `arc profile clone [--no-copy-dir]` CLI + `Shift+C` inline clone in ProfilesView - [x] **Usage/audit log** — structured JSONL log with `arc logs` CLI, level/component/profile filtering (Phase 3) ## v2.0 Spec Features (All 25 Phases Complete) @@ -213,8 +213,3 @@ These items from the original v0.1 backlog are still open: - [ ] Profile search/filter in Profiles view - [ ] Environment preview before launch - [ ] Team/shared config (repo-checked config with local secret overlays) -- [ ] Backup/export/import (move profiles between machines) -- [ ] Launch history on Dash -- [ ] Toast notifications -- [ ] Interactive sidebar queue -- [ ] Profile cloning/duplication From c16c1a20c8539f0435d24921ba7866f892bd1554 Mon Sep 17 00:00:00 2001 From: Bailey Dixon Date: Sat, 18 Apr 2026 08:22:46 -0400 Subject: [PATCH 04/17] docs: Add plan for AI chat + full roundtable integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Architecture decisions (approved): - Permission default: supervised - Backend: CLI-spawn (not HTTP LLM client) — orchestrate claude/codex/gemini CLIs via Agent-Forge pattern; MCP is the tool-use interop surface - Session storage: per profile - Roundtable composition: both real-profile and virtual-agent modes - Dangerous tools allowed with confirm modal, always logged Plan covers 10 phases (0, 0.5, 1-9) through Phase 9 docs + 0.4.0 release. Phase 0.5 adds launchMode toggle (native/worker) so Claude's native TUI chrome (statusLine, etc.) renders when not orchestrating. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/plans/ai-and-roundtable.md | 452 ++++++++++++++++++++++++++++++++ 1 file changed, 452 insertions(+) create mode 100644 docs/plans/ai-and-roundtable.md diff --git a/docs/plans/ai-and-roundtable.md b/docs/plans/ai-and-roundtable.md new file mode 100644 index 0000000..86a9d82 --- /dev/null +++ b/docs/plans/ai-and-roundtable.md @@ -0,0 +1,452 @@ +# Plan: AI Chat + Full Roundtable Integration + +**Status:** Approved — revised 2026-04-18, starting Phase 0.5 + Phase 1 +**Last updated:** 2026-04-18 +**Owner:** Bailey + +## Decisions (approved 2026-04-18) + +1. **Permission default:** `supervised` — writes require confirmation, dangerous tools allowed with explicit confirm. +2. **Backend approach (revised):** **CLI-spawn, not HTTP.** The dashboard AI and roundtable orchestrator spawn the profile's actual CLI tool (`claude`, `codex`, `gemini`) as a child process — same pattern as Agent-Forge. Prompts are delivered via configured input method (sendKeys / pasteFromFile / direct arg); responses captured via stdout. Tool use flows through MCP injected at launch (the three `mcpMode` variants). **No direct HTTP LLM client is built** — we orchestrate the existing agents' own tool use. +3. **License:** Agent-Forge is Bailey's project; copy freely with attribution comments. +4. **Session storage:** per profile — `~/.arc/profiles//chat-sessions/`. +5. **Roundtable composition:** support **both** real-profile agents (each agent = its own ARC profile) and virtual agents (N role-differentiated agents all using the same profile). +6. **Dangerous tool scope:** allowed in dashboard AI with explicit confirm modal; always logged to activity.log regardless of mode. + +--- + +## Goal + +Ship three interlocking capabilities that let end users fully leverage ARC: + +1. **Dashboard AI chat** — a chat panel in the web dashboard that uses the user's chosen ARC profile's provider, has deep knowledge of ARC's features/state/config, and can **act** on ARC (create/clone profiles, configure providers, import/export, run doctor, start roundtables, etc.) via a tool-use layer. +2. **Full roundtable feature** — promote the existing `roundtable` hook from a state-tracking hook into a first-class feature with CLI (`arc roundtable`), MCP tool, and dashboard UI. Preserve the hook's state machine; add the missing orchestrator loop. +3. **Multi-agent pipelines from the dashboard** — UI to configure and run multi-agent flows (roundtable, PLAN→EXEC→VERIFY, consensus gates) with live progress, transcript, and outcome. + +Surfaces required for all three: **CLI + MCP + Dashboard**. + +--- + +## Ground Truth (from recon) + +### What ARC has today + +| Surface | Status | +|---|---| +| Roundtable hook (`packages/core/src/hooks/roundtable.ts`, 580 lines, priority 50) | Production-quality turn/state/mode machinery. Zero test coverage. No driver loop. | +| Interagent routing (bypass during active roundtable) | Working, tested. | +| Adapters (Claude/Gemini/Codex/OpenClaw/Hermes/openai-compat) | Process-spawn only. **No direct LLM calls anywhere.** | +| `ProviderConfig` on Profile (baseUrl, model, apiKeyEnvVar, displayName) | Stored, never used for HTTP calls. | +| `LLMCompleteFn` placeholder type in `completion-auditor.ts:42` | Intentional stub for "future M004 milestone". | +| MCP server (`@axiom-labs/arc-mcp`) | 5 tools: classify_risk, audit_completion, expand_intent, derive_completion, explain_trace. Clean authoring pattern. Stdio + HTTP transport. | +| Dashboard (`packages/dashboard/`) | Raw `node:http` + hand-rolled RFC6455 WS. Vanilla JS SPA. `ws.broadcast()` is all-clients only. Clean route registration pattern. | +| Dark Factory Mode | State machine (idle→planning→executing→verifying→gating→completed) exists but is disconnected from roundtable. | + +### What Agent-Forge has (at `C:\Users\Bailey\Desktop\Open-Projects\agent-forge`) + +| Component | Port decision | +|---|---| +| `AgentDeliveryPolicy` + `computeAdaptiveGraceMs` + `updateReplyLatencyAverage` (`lib/agent-delivery.ts`) — model-aware adaptive pacing with EMA latency tracking | **Port verbatim** — zero deps, pure functions, directly useful | +| `StagedWorkflowManager` (`lib/staged-workflow.ts`) — PLAN→EXEC→VERIFY state machine with cursor-based message polling | **Port** as generic pipeline primitive | +| `AgentWatchdog` (`lib/agent-watchdog.ts`) — stall detection, nudge at 3min, mark stalled at 5min, decision messages | **Port** adapted to ARC's process model | +| 6-tool MCP contract: `team_say` / `team_read` / `team_status` / `team_done` / `team_plan` / `team_ask` | **Port contract**, reimplement on `@modelcontextprotocol/sdk` | +| `agents.json` + `mcpMode` variants (`config-file` / `mcp-add` / `config-args`) | **Absorb as tribal knowledge** into ARC adapter layer | +| `collab-templates.json` (4 role templates) | **Reimplement** as ARC roundtable templates | +| REST server, React dashboard, RBAC, tmux runtime | **Skip** — wrong fit | + +### Critical Gap + +**Neither ARC nor Agent-Forge has a direct LLM client.** Every feature the user wants (chat, headless roundtable, consensus pipelines) requires building one. That is the blocker for all phases. + +--- + +## Architectural Decisions + +### AD-1: CLI-spawn agent client (revised) + +**Decision:** do not build a direct HTTP LLM client. Instead, build an `AgentClient` abstraction that spawns the profile's CLI tool (`claude`, `codex`, `gemini`) with an input prompt and captures its response — the Agent-Forge pattern. + +Why: +- ARC's whole investment is in CLI adapters; reuse it. +- The agent tools already have their own streaming, tool use, and MCP integration — we orchestrate, not reimplement. +- MCP is the clean interop surface: inject ARC's tool server at spawn time, every agent (Claude / Codex / Gemini) can call ARC tools through the same contract. +- No per-provider auth reinvention. OAuth/API keys are already resolved by the native CLI. + +New module: `packages/core/src/agent-client/`. + +```typescript +interface AgentClient { + // One-shot: send a prompt, stream response until the agent signals done + send(prompt: string, opts?: { + mcpConfig?: McpConfigInjection; + instructions?: string; + signal?: AbortSignal; + }): AsyncIterable; + shutdown(): Promise; +} + +type AgentChunk = + | { type: "text"; content: string } + | { type: "tool_call"; tool: string; input: unknown } + | { type: "tool_result"; tool: string; result: unknown } + | { type: "done"; reason: "end_turn" | "max_turns" | "stop" }; +``` + +Three implementations, one per tool, each derived from Agent-Forge's `agents.json` entries: +- **`ClaudeAgentClient`** — `claude` binary, `--mcp-config ` injection, stdout parser +- **`CodexAgentClient`** — `codex` binary, `-c mcp.servers.arc={json}` injection +- **`GeminiAgentClient`** — `gemini` binary, `gemini mcp add` pre-launch + +Dispatcher: `getAgentClientForProfile(profile): AgentClient` — picks by `profile.tool`. + +Input delivery methods ported from Agent-Forge (`inputMethod` field): +- `sendKeys` — line-by-line stdin write +- `pasteFromFile` — write prompt to temp file, send `/paste ` command +- `direct` — pass as CLI arg (one-shot non-TUI mode) + +For the first cut, use each tool's **one-shot non-TUI mode** where possible (`claude -p ""`, `gemini -p ""`, `codex exec --json`). This sidesteps TUI capture complexity. Upgrade to persistent TTY sessions in a later phase if needed for multi-turn roundtables. + +### AD-2: Tool Registry + agent loop + +Separate from the LLM client: + +```typescript +interface Tool { + name: string; + description: string; + schema: z.ZodSchema; + permission: "read" | "write" | "dangerous"; + handler: (input: unknown, ctx: ToolContext) => Promise; +} + +class ToolRegistry { + register(tool: Tool): void; + getSchemas(filter?: (t: Tool) => boolean): ToolDefinition[]; + async execute(name: string, input: unknown, ctx: ToolContext): Promise; +} +``` + +Agent loop (`runAgent(client, registry, prompt, mode)`): +1. Send prompt + tool schemas to client +2. For each chunk: if text → emit; if tool_use → execute via registry, gate by permission mode, append tool_result to conversation, loop +3. Stop on end_turn + +Three permission modes: +- `read-only` — only `read` tools available +- `supervised` (default) — `write` tools require user confirmation via UI; `dangerous` tools always blocked +- `autonomous` — all tools available, all writes logged to `activity.log` + +### AD-3: ARC tool set + +Core tools (all map to existing CLI handlers or core functions): + +**Read:** `list_profiles`, `show_profile`, `get_active_profile`, `list_launches`, `query_logs`, `doctor_report`, `list_mcp_servers`, `list_skills`, `list_memories`, `list_tasks`, `list_remote_agents`, `get_arc_feature` (returns info about any ARC feature from a bundled knowledge index) + +**Write:** `create_profile`, `clone_profile`, `switch_active_profile`, `set_profile_flags`, `set_instructions`, `configure_provider`, `backup_create`, `profile_export`, `profile_import`, `mcp_connect`, `delegate_task` + +**Dangerous:** `delete_profile`, `backup_restore`, `prune`, `mcp_tool_call` (calling arbitrary MCP tools) + +**Meta:** `start_roundtable`, `run_pipeline` (PLAN→EXEC→VERIFY) + +### AD-4: Roundtable as hook + orchestrator + +Keep the existing hook. Add `RoundtableOrchestrator` that: + +1. Accepts `{ topic, agents: { profile, role }[], rounds, synthesizer }` +2. Initializes state via existing hook (triggering with `@roundtable` prefix) +3. Loops: read current turn from `RoundtableState`, get that profile's `LlmClient`, call with built prompt (role + transcript so far), post response back into `HookBus.runPost()` to advance state +4. On state transition to `"synthesizing"`: call designated synthesizer with structured prompt requesting consensus score + summary +5. Returns `{ transcript, synthesis, consensus: 0-1, durationMs }` + +Uses Agent-Forge's `AgentDeliveryPolicy` for between-turn pacing. + +### AD-5: Dashboard per-session streaming + +Extend WS server: +- Add `sessionId` negotiation on connect (client sends `{ type: "hello", sessionId: "uuid" }`) +- `ws.broadcastTo(sessionId, event, data)` method +- `ws.broadcast()` preserved (no sessionId filter = all clients) + +Chat streaming uses `broadcastTo` — text chunks stream to only the originating session. Roundtable runner uses `broadcast` — all viewers see live progress. + +### AD-6: Knowledge endowment + +Build-time + runtime system prompt composition: + +**Static** (baked at build): +- ARC purpose + architecture summary (~300 words) +- Command reference (extracted from `cli.ts` via codegen, ~50 commands × one-line desc) +- Tool catalog (auto-generated from `ToolRegistry`) +- Links to doc pages (for the AI to cite) + +**Runtime** (per chat session): +- Active profile + provider + model +- Profile count, last 3 launches, any failing doctor checks +- Current ARC version +- Warning if shared layer has unresolved conflicts + +No embeddings, no vector DB. Scope is bounded enough that a well-curated prompt beats retrieval. + +### AD-7: License + porting hygiene + +- Check `agent-forge/LICENSE` before copying any code. If MIT/Apache-compatible: copy with attribution comment pointing to upstream file path. If GPL or proprietary: reimplement from the design, not the code. +- Put ported code in clearly-named files (`packages/core/src/orchestration/delivery-policy.ts`) with a top-of-file comment: `// Ported from agent-forge/lib/agent-delivery.ts — see docs/plans/ai-and-roundtable.md AD-7`. + +--- + +## Phased Delivery + +### Phase 0 — Scaffolding +**Deliverables:** +- Create `packages/core/src/agent-client/` + `packages/core/src/orchestration/` + `packages/core/src/knowledge/` with placeholder index files +- Port `agents.json` → `packages/core/src/agent-client/registry.ts` as a typed constant (Claude, Codex, Gemini entries with `command`, `flags`, `readyMarker`, `inputMethod`, `mcpMode`, `promptDelivery`) +- Stub `AgentClient` interface in `types.ts` + +**Exit criteria:** directory structure + types in place, clean build + +--- + +### Phase 0.5 — Launch hygiene (native vs orchestrated) + +**Context:** Currently adapters use `spawnManagedProcess()` which captures stdout for monitoring — this puts tools in "worker mode" and prevents their native TUI chrome (e.g., Claude's statusLine) from rendering. Users need the option to launch a tool in its full native experience. + +**Deliverables:** +- [ ] Add `launchMode?: "native" | "worker"` to `Profile` type (default `"native"`) +- [ ] `native` mode: use `spawnSync` with inherited stdio (full TTY handoff, ARC TUI exits) — same as the existing fallback path in `launch.ts:511-526` +- [ ] `worker` mode: keep existing `spawnManagedProcess` path (for roundtable, team sessions, programmatic orchestration) +- [ ] `arc launch --native` / `--worker` CLI flags override profile setting +- [ ] Doctor check: detect deprecated `CLAUDE_CODE_NO_FLICKER=1` in env, warn + hint "v2.1.110+ uses `/tui fullscreen` — unset this var" +- [ ] ProfilesView: show launch mode in detail pane; `m` key toggles native/worker +- [ ] Update docs (`user-docs/profiles.md`) with the two modes + +**Acceptance:** +- `arc launch claude-profile` (native default) → Claude paints its own TUI with statusLine +- `arc launch claude-profile --worker` → Claude runs under ARC supervision for orchestration +- Roundtable orchestrator (Phase 5) forces worker mode regardless of profile setting +- Doctor flags stale `CLAUDE_CODE_NO_FLICKER` + +**Non-blocking:** can ship independently of the rest of the plan. + +--- + +### Phase 1 — Agent client (CLI-spawn) foundation +**Deliverables:** +- [ ] `packages/core/src/agent-client/types.ts` — `AgentClient`, `AgentChunk`, `McpConfigInjection`, `InputMethod` +- [ ] `packages/core/src/agent-client/claude.ts` — one-shot mode: `claude -p "" --output-format stream-json --mcp-config `; line-parse `stream-json` output into `AgentChunk` +- [ ] `packages/core/src/agent-client/codex.ts` — one-shot mode: `codex exec --json` with prompt on stdin; parse JSON event stream +- [ ] `packages/core/src/agent-client/gemini.ts` — one-shot mode: `gemini -p ""`; plain text capture (no structured tool events — tool use surfaced via MCP server side-channel) +- [ ] `packages/core/src/agent-client/dispatch.ts` — `getAgentClientForProfile(profile): AgentClient` +- [ ] `packages/core/src/agent-client/mcp-injection.ts` — writes temp MCP config per `mcpMode` variant +- [ ] Unit tests: mock child process, verify prompt delivery + chunk parsing for each client +- [ ] Export from `packages/core/src/index.ts` + +**Acceptance:** +- With a Claude profile + API key or OAuth, `agentClient.send("list 3 facts about TypeScript")` yields text chunks and a `{type:"done"}` terminator +- Same for Codex and Gemini profiles +- MCP config injection writes to the right location per agent (validated by inspecting the temp file) +- Typecheck + build + tests clean + +**Blocks:** Phases 2, 4, 5, 6, 7, 8 + +--- + +### Phase 2 — Tool registry + agent loop +**Deliverables:** +- [ ] `packages/core/src/agent/tools.ts` — `Tool`, `ToolRegistry`, `ToolContext` +- [ ] `packages/core/src/agent/loop.ts` — `runAgent(client, registry, ctx)` generator +- [ ] `packages/core/src/agent/arc-tools.ts` — ARC tool definitions (list_profiles, clone_profile, etc.) wired to existing handlers +- [ ] Permission gating: `read-only` / `supervised` / `autonomous` modes with confirmation callback +- [ ] Unit tests for loop: mock client emitting tool_use, verify registry dispatch + result injection + +**Acceptance:** +- Agent loop can answer "what profiles do I have?" using `list_profiles` tool +- Supervised mode blocks `clone_profile` until confirm callback returns true +- 20+ ARC tools wired and callable + +--- + +### Phase 3 — Knowledge endowment +**Deliverables:** +- [ ] `packages/core/src/knowledge/index.ts` — static knowledge object (ARC purpose, architecture, command ref, doc links) +- [ ] `scripts/build-command-ref.js` — codegen script reading `cli.ts` to extract commands into a TS constant (run in `prebuild`) +- [ ] `packages/core/src/knowledge/runtime.ts` — `buildSystemPrompt(ctx)` composing static + live state snapshot +- [ ] `packages/core/src/knowledge/feature-index.ts` — structured feature catalog from FEATURES.md + `get_arc_feature` tool implementation + +**Acceptance:** +- System prompt is deterministic, reproducible, under 4000 tokens +- Live snapshot section reflects current config within 10s of change + +--- + +### Phase 4 — CLI surface: `arc chat` +**Deliverables:** +- [ ] `packages/cli/src/commands/chat.ts` — interactive terminal chat using `readline`, streams to stdout +- [ ] Flags: `--profile ` (override active), `--mode read-only|supervised|autonomous`, `--once ` (one-shot), `--no-tools` +- [ ] CLI registration in `cli.ts` +- [ ] Integration test: one-shot mode with a fake LLM client + +**Acceptance:** +- `arc chat` opens REPL using active profile's LLM client +- `arc chat --once "list my profiles"` returns tool-call-driven answer and exits +- Supervised mode shows confirmation prompts in terminal + +--- + +### Phase 5 — Roundtable orchestrator +**Deliverables:** +- [ ] `packages/core/src/orchestration/delivery-policy.ts` — port `AgentDeliveryPolicy` + `computeAdaptiveGraceMs` + EMA latency +- [ ] `packages/core/src/orchestration/staged-workflow.ts` — port `StagedWorkflowManager` (PLAN/EXEC/VERIFY) +- [ ] `packages/core/src/orchestration/roundtable.ts` — `RoundtableOrchestrator` driving the existing hook +- [ ] Watchdog port: `packages/core/src/orchestration/watchdog.ts` +- [ ] Tests: roundtable with 3 mocked agents, state progression, synthesis, consensus score +- [ ] First tests for the roundtable hook itself (fill the coverage gap) + +**Acceptance:** +- `RoundtableOrchestrator.run({ topic, agents, rounds: 2 })` produces a full transcript + synthesis with consensus float +- Adaptive pacing reduces throttling for fast providers +- Roundtable hook now has ≥ 80% line coverage + +--- + +### Phase 6 — `arc roundtable` CLI + MCP tools +**Deliverables:** +- [ ] `packages/cli/src/commands/roundtable.ts` — `arc roundtable --agents --rounds 2` +- [ ] Streaming transcript to terminal with per-agent color coding +- [ ] `packages/mcp/src/tools/roundtable.ts` — `arc_roundtable` MCP tool +- [ ] `packages/mcp/src/tools/chat.ts` — `arc_chat` MCP tool (one-shot, no streaming) +- [ ] `packages/mcp/src/tools/team/` — port 6-tool contract (`team_say`, `team_read`, etc.) for inter-agent comms in team sessions + +**Acceptance:** +- `arc roundtable "should we rewrite X?" --agents fast-opus,claude-sonnet,codex` produces usable transcript +- MCP inspector shows new tools; invoking them works end-to-end +- Existing 5 MCP tools still pass integration tests + +--- + +### Phase 7 — Dashboard chat view +**Deliverables:** +- [ ] `packages/dashboard/src/ws.ts` — add `sessionId` negotiation + `broadcastTo(sessionId, event, data)` +- [ ] `packages/dashboard/src/api.ts` — new `POST /api/chat/message` endpoint; emits chunks via `broadcastTo` +- [ ] `packages/dashboard/public/components/chat.js` — chat view with message list, streaming incoming chunks, tool-call visualization +- [ ] Sidebar: add "Chat" item +- [ ] Settings panel: permission mode toggle (`read-only` / `supervised` / `autonomous`) +- [ ] Confirmation modal for supervised writes + +**Acceptance:** +- End user opens dashboard, picks a profile, chats about ARC +- Tool calls render as expandable panels showing input + result +- Clone/export/backup actions work through chat with confirmations +- Session history persists across page reload (stored in `~/.arc/chat-sessions.json`) + +--- + +### Phase 8 — Dashboard roundtable + pipelines view +**Deliverables:** +- [ ] `packages/dashboard/public/components/roundtable.js` — configure roundtable (topic, agents from profile picker, rounds), start, watch live transcript, see synthesis +- [ ] `packages/dashboard/public/components/pipelines.js` — configure staged workflow (PLAN→EXEC→VERIFY), watch phase progression, see phase messages +- [ ] `POST /api/roundtable/run` + `POST /api/pipeline/run` endpoints with WS broadcast updates +- [ ] Persist past runs to `~/.arc/roundtables/.json` and `~/.arc/pipelines/.json` with a history list + +**Acceptance:** +- User configures + runs a 2-round roundtable from dashboard +- Live updates via WS, no polling +- History view shows past runs with result summary + +--- + +### Phase 9 — Docs + polish +**Deliverables:** +- [ ] `user-docs/` page: "AI Chat Guide" (what it can do, permission modes, safety) +- [ ] `user-docs/` page: "Running Roundtables" (CLI + dashboard examples) +- [ ] `user-docs/` page: "Multi-Agent Pipelines" (PLAN/EXEC/VERIFY pattern) +- [ ] FEATURES.md updates: mark new items shipped +- [ ] DEVLOG.md entry summarizing design choices +- [ ] Version bump: 0.3.0 → 0.4.0 (minor, new features) + +**Acceptance:** +- Docs buildable, linked from nav +- Version bump consistent across CLI + site + +--- + +## Open Questions — all answered 2026-04-18 + +See **Decisions** section at the top of this doc. + +--- + +## Out of Scope (explicitly) + +- Embedding/vector store for doc retrieval — bounded domain, skip. +- Fine-tuning or custom models — providers handle this upstream. +- Voice chat or image input — text only for v1. +- Dashboard authentication beyond the existing token — chat inherits the dashboard's auth model, no new identity layer. +- Multi-user chat or shared sessions — single-user context. +- Running roundtables across machines — localhost only for v1. Remote agents (Phase 24) already handle cross-machine agent registry but orchestration stays local. + +--- + +## Progress Tracking + +Update checkboxes in-place as phases complete. Add a `Completed YYYY-MM-DD` marker at the bottom of each phase. + +### Phase 0 — Scaffolding +- [ ] Not started + +### Phase 0.5 — Launch hygiene (native vs orchestrated) +- [ ] Not started + +### Phase 1 — Agent client (CLI-spawn) foundation +- [ ] Not started + +### Phase 2 — Tool registry + agent loop +- [ ] Not started + +### Phase 3 — Knowledge endowment +- [ ] Not started + +### Phase 4 — CLI `arc chat` +- [ ] Not started + +### Phase 5 — Roundtable orchestrator +- [ ] Not started + +### Phase 6 — `arc roundtable` CLI + MCP tools +- [ ] Not started + +### Phase 7 — Dashboard chat view +- [ ] Not started + +### Phase 8 — Dashboard roundtable + pipelines +- [ ] Not started + +### Phase 9 — Docs + polish +- [ ] Not started + +--- + +## Risk Register + +| Risk | Likelihood | Impact | Mitigation | +|---|---|---|---| +| Agent-Forge license incompatibility | Low | Medium | Check before Phase 5; reimplement from design if needed | +| Streaming SSE parsing bugs across providers | Medium | Medium | Test matrix against 3 providers (OpenRouter, Ollama, LM Studio) before Phase 7 | +| Tool schemas grow unwieldy | Medium | Low | Auto-generate from existing zod schemas on CLI commands where possible | +| Dashboard WS broadcast refactor breaks existing views | Low | High | Preserve `broadcast()` as alias for broadcast-to-all; add `broadcastTo()` alongside | +| Chat context window blown by tool results | Medium | Medium | Truncate large tool results (default 4KB); summarize after N turns using context-manager (Phase 19 infra already exists) | +| Roundtable LLM costs balloon | Medium | Low | Default to 2 rounds, surface cost estimate before run, allow `--dry-run` | +| Users abuse autonomous mode, lose data | Low | High | Ship supervised as default; `arc config` flag required to enable autonomous; prominent disclaimer | + +--- + +## References + +- Recon: `C:\Users\Bailey\Desktop\Open-Projects\agent-forge\` (see Ground Truth section) +- Roundtable hook: `packages/core/src/hooks/roundtable.ts` +- Interagent routing: `packages/core/src/hooks/interagent-routing.ts` +- Hook bus: `packages/core/src/hooks/create-default-bus.ts` +- Profile types: `packages/core/src/types.ts:35-59` +- Dashboard server: `packages/dashboard/src/server.ts` +- Dashboard WS: `packages/dashboard/src/ws.ts` +- MCP server: `packages/mcp/src/server.ts` +- MCP tool pattern: `packages/mcp/src/tools/classify-risk.ts` From 6ff876b1453f64fa58c783232710116449bdeb9f Mon Sep 17 00:00:00 2001 From: Bailey Dixon Date: Sat, 18 Apr 2026 08:37:45 -0400 Subject: [PATCH 05/17] feat: launch mode toggle + agent client foundation (Phase 0/0.5/1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 0.5 — Launch mode toggle (native vs worker): - Profile.launchMode ('native' | 'worker', default native) lets users pick between full TTY handoff (spawnSync + inherit) or ARC-supervised worker mode (spawnManagedProcess). Native mode lets Claude paint its own TUI (statusLine, slash commands, etc.); worker mode is for orchestration. - arc launch --native / --worker CLI flags override the profile. - Doctor warns on deprecated CLAUDE_CODE_NO_FLICKER env var. - ProfilesView 'm' keybind toggles mode inline. - LaunchOptions extended so Phase 5+ orchestrators can force worker mode. Phase 0+1 — Agent client (CLI-spawn) foundation: - New packages/core/src/agent-client/: AgentClient interface, per-tool clients (Claude claude -p --output-format stream-json, Codex codex exec --json, Gemini gemini -p), registry ported from agent-forge's agents.json with mcpMode variants. - MCP config injection per mode: config-file (Claude), config-args (Codex TOML-literal), mcp-add pre-launch (Gemini). - Stream parsers tolerate version drift (Codex kind/type discriminators, Claude event envelope unwrap). - 48 unit tests covering parsers, registry, MCP injection, dispatcher. Unblocks Phase 2 (tool registry + agent loop), Phase 4 (arc chat), Phase 5 (roundtable orchestrator), and the Phase 7 dashboard chat. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/cli/src/cli.ts | 24 +- packages/cli/src/commands/doctor.ts | 17 ++ packages/cli/src/commands/launch.ts | 84 ++++-- packages/cli/src/tui/views/ProfilesView.tsx | 25 +- packages/core/src/adapters/types.ts | 6 + packages/core/src/agent-client/README.md | 116 +++++++++ packages/core/src/agent-client/claude.ts | 83 ++++++ packages/core/src/agent-client/codex.ts | 76 ++++++ packages/core/src/agent-client/dispatch.ts | 77 ++++++ packages/core/src/agent-client/gemini.ts | 73 ++++++ packages/core/src/agent-client/index.ts | 43 +++ .../core/src/agent-client/mcp-injection.ts | 174 +++++++++++++ packages/core/src/agent-client/registry.ts | 67 +++++ .../core/src/agent-client/spawn-helpers.ts | 157 +++++++++++ .../core/src/agent-client/stream-parsers.ts | 246 ++++++++++++++++++ packages/core/src/agent-client/types.ts | 117 +++++++++ packages/core/src/index.ts | 1 + packages/core/src/types.ts | 8 + tests/integration/profile-inheritance.test.ts | 4 +- tests/unit/agent-client/dispatch.test.ts | 76 ++++++ tests/unit/agent-client/mcp-injection.test.ts | 114 ++++++++ tests/unit/agent-client/registry.test.ts | 53 ++++ .../unit/agent-client/stream-parsers.test.ts | 209 +++++++++++++++ user-docs/guide/profiles.md | 42 +++ 24 files changed, 1863 insertions(+), 29 deletions(-) create mode 100644 packages/core/src/agent-client/README.md create mode 100644 packages/core/src/agent-client/claude.ts create mode 100644 packages/core/src/agent-client/codex.ts create mode 100644 packages/core/src/agent-client/dispatch.ts create mode 100644 packages/core/src/agent-client/gemini.ts create mode 100644 packages/core/src/agent-client/index.ts create mode 100644 packages/core/src/agent-client/mcp-injection.ts create mode 100644 packages/core/src/agent-client/registry.ts create mode 100644 packages/core/src/agent-client/spawn-helpers.ts create mode 100644 packages/core/src/agent-client/stream-parsers.ts create mode 100644 packages/core/src/agent-client/types.ts create mode 100644 tests/unit/agent-client/dispatch.test.ts create mode 100644 tests/unit/agent-client/mcp-injection.test.ts create mode 100644 tests/unit/agent-client/registry.test.ts create mode 100644 tests/unit/agent-client/stream-parsers.test.ts diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 256a62c..0f14cfc 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -208,6 +208,8 @@ export function createProgram(): Command { .command("launch [name]") .description("Launch agent tool with a profile") .option("-d, --dashboard", "Start web dashboard alongside agent") + .option("--native", "Run the tool with full TTY handoff (ARC exits, tool paints its own TUI)") + .option("--worker", "Run the tool under ARC supervision (stdout captured for orchestration)") .passThroughOptions() .allowUnknownOption() .allowExcessArguments() @@ -216,8 +218,19 @@ export function createProgram(): Command { ` All flags after the profile name are forwarded to the agent tool. +Launch modes: + --native Full TTY handoff (default). ARC exits; tool paints its own TUI + (e.g. Claude's statusLine). Best for daily interactive use. + --worker Run under ARC supervision. Stdout captured for monitoring / + orchestration (roundtable, pipelines). Suppresses native TUI chrome. + +If neither flag is given, the profile's \`launchMode\` setting is used +(fallback: native). + Examples: $ arc launch work + $ arc launch work --native + $ arc launch work --worker $ arc launch work --model sonnet $ arc launch work --dashboard $ arc launch work --dangerously-skip-permissions @@ -228,11 +241,18 @@ Examples: .action( async ( name: string | undefined, - opts: { dashboard?: boolean }, + opts: { dashboard?: boolean; native?: boolean; worker?: boolean }, cmd: Command ) => { + // Re-inject parsed launch-mode flags into the args array so handleLaunch can see them. + // (Commander strips recognized options, but handleLaunch's CLI-flag extractor expects + // them in the raw-args stream.) + const extraArgs: string[] = []; + if (opts.native) extraArgs.push("--native"); + if (opts.worker) extraArgs.push("--worker"); + const mergedArgs = extraArgs.length > 0 ? [...cmd.args, ...extraArgs] : cmd.args; const mod = await import("./commands/launch.js"); - await mod.handleLaunch(name, cmd.args, { dashboard: opts.dashboard }); + await mod.handleLaunch(name, mergedArgs, { dashboard: opts.dashboard }); } ); diff --git a/packages/cli/src/commands/doctor.ts b/packages/cli/src/commands/doctor.ts index 7860570..7ea9ada 100644 --- a/packages/cli/src/commands/doctor.ts +++ b/packages/cli/src/commands/doctor.ts @@ -226,6 +226,22 @@ async function checkProfiles(): Promise { // ── Main Handler ──────────────────────────────────── +/** + * Diagnostic: env.deprecated_no_flicker + * Flags the deprecated CLAUDE_CODE_NO_FLICKER env var. v2.1.110+ of Claude Code + * uses `/tui fullscreen` instead; leaving this var set can cause odd rendering. + */ +function checkDeprecatedEnv(): void { + if (process.env["CLAUDE_CODE_NO_FLICKER"] !== undefined) { + warning( + "CLAUDE_CODE_NO_FLICKER is deprecated (v2.1.110+ uses /tui fullscreen)" + ); + console.log( + ` ${pc.dim("Repair: Remove CLAUDE_CODE_NO_FLICKER from your shell profile")}` + ); + } +} + function checkNodeVersion(): void { const version = process.version.replace(/^v/, ""); const major = parseInt(version.split(".")[0], 10); @@ -250,6 +266,7 @@ export async function handleDoctor(): Promise { checkConfigFile(); checkPath(); checkShellIntegration(); + checkDeprecatedEnv(); await checkProfiles(); diff --git a/packages/cli/src/commands/launch.ts b/packages/cli/src/commands/launch.ts index 1d83d9f..f2bec0d 100644 --- a/packages/cli/src/commands/launch.ts +++ b/packages/cli/src/commands/launch.ts @@ -50,7 +50,16 @@ function getInstallHint(tool: string): string { export async function handleLaunch( name: string | undefined, rawArgs: string[], - opts?: { beforeSpawn?: () => void | Promise; dashboard?: boolean } + opts?: { + beforeSpawn?: () => void | Promise; + dashboard?: boolean; + /** + * Force a specific launch mode regardless of profile setting or CLI flags. + * Used by orchestrators (Phase 5+ roundtable, pipelines) to guarantee + * `worker` mode so stdout can be captured. + */ + launchMode?: "native" | "worker"; + } ): Promise { const config = loadConfig(); let profileName: string; @@ -76,6 +85,20 @@ export async function handleLaunch( passthrough = passthrough.slice(1); } + // Extract launch-mode flags from passthrough so they are not forwarded to the agent + let cliLaunchMode: "native" | "worker" | undefined; + passthrough = passthrough.filter((arg) => { + if (arg === "--native") { + cliLaunchMode = "native"; + return false; + } + if (arg === "--worker") { + cliLaunchMode = "worker"; + return false; + } + return true; + }); + // Resolve profile through workspace-aware pipeline (arc.json > explicit > activeProfile) let profile: Profile; try { @@ -91,6 +114,11 @@ export async function handleLaunch( const tool = profile.tool ?? "claude"; const enforcement = profile.enforcement ?? "log"; + // Resolve effective launch mode: caller override > CLI flag > profile setting > default native. + // Orchestrators (roundtable, pipelines) pass `opts.launchMode = "worker"` to force supervision. + const effectiveLaunchMode: "native" | "worker" = + opts?.launchMode ?? cliLaunchMode ?? profile.launchMode ?? "native"; + // ─── Session auto-resume detection ────────────────────────────────── // Lightweight: detect whether the user's launch args suggest resume intent // and whether a suspended session exists. Informational only for now. @@ -380,30 +408,35 @@ export async function handleLaunch( }); let agentProcess: AgentProcess | null = null; - try { - agentProcess = await adapter.launch(profile, { - args: allArgs, - env: profileEnv, - cwd: process.cwd(), - beforeSpawn: opts?.beforeSpawn ? async () => { await opts!.beforeSpawn!(); } : undefined, - }); - } catch (err: unknown) { - const msg = err instanceof Error ? err.message : String(err); - if (msg === "not implemented") { - // Adapter still has stub lifecycle — fall back to spawnSync - agentProcess = null; - } else { - // Real error from a real adapter - recordLaunch({ - profile: profileName, - tool, - timestamp: new Date().toISOString(), - outcome: "failed", + if (effectiveLaunchMode === "worker") { + // Worker mode: hand off to the adapter for managed supervision. + try { + agentProcess = await adapter.launch(profile, { + args: allArgs, + env: profileEnv, + cwd: process.cwd(), + beforeSpawn: opts?.beforeSpawn ? async () => { await opts!.beforeSpawn!(); } : undefined, + launchMode: "worker", }); - error(`Failed to launch ${tool}: ${msg}`); - process.exit(1); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + if (msg === "not implemented") { + // Adapter still has stub lifecycle — fall back to spawnSync + agentProcess = null; + } else { + // Real error from a real adapter + recordLaunch({ + profile: profileName, + tool, + timestamp: new Date().toISOString(), + outcome: "failed", + }); + error(`Failed to launch ${tool}: ${msg}`); + process.exit(1); + } } } + // Native mode falls straight through to the spawnSync path below for full TTY handoff. // ─── Finalization helper ────────────────────────────────────────── // Completes session tracking and flushes telemetry. Wrapped in try/catch @@ -514,10 +547,11 @@ export async function handleLaunch( process.exit(0); } - // ─── Legacy spawnSync path (stubbed adapters: Claude, Gemini) ──── + // ─── Native TTY handoff path (default mode + adapter-stub fallback) ─ // Use spawnSync with stdio:"inherit" — the parent blocks completely and - // the child process owns the terminal. No stdin competition, no async - // race conditions, no DEP0190 warning. + // the child process owns the terminal, so the tool can paint its own TUI + // (e.g. Claude's statusLine). No stdin competition, no async race + // conditions, no DEP0190 warning. // On Windows, tools are often .cmd shims that need `cmd /c` to resolve. if (opts?.beforeSpawn) { await opts.beforeSpawn(); diff --git a/packages/cli/src/tui/views/ProfilesView.tsx b/packages/cli/src/tui/views/ProfilesView.tsx index fe4147f..ada92a0 100644 --- a/packages/cli/src/tui/views/ProfilesView.tsx +++ b/packages/cli/src/tui/views/ProfilesView.tsx @@ -358,6 +358,25 @@ export function ProfilesView({ onShowInfo?.(selected.name); return; } + + // [m] toggle launch mode (native <-> worker) + if (input === "m") { + try { + const config = loadConfig(); + const profile = config.profiles[selected.name]; + if (!profile) return; + const current = profile.launchMode ?? "native"; + const next: "native" | "worker" = current === "native" ? "worker" : "native"; + profile.launchMode = next; + saveConfig(config); + showMessage(`Launch mode: ${next}`); + reload(); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + showMessage(`Toggle failed: ${msg}`); + } + return; + } }, { isActive: isActive && inputEnabled } ); @@ -386,6 +405,10 @@ export function ProfilesView({ {isSyncSource && ( {"\u2605"} sync source — shared layer auto-pulls from this profile )} + {/* Launch mode indicator */} + + launch: [{selectedConfig?.launchMode ?? "native"}] + {/* Shared layer status */} {selectedManifest ? ( @@ -461,7 +484,7 @@ export function ProfilesView({ {!loading && action === "idle" && ( - {"\u21B5"} launch s switch i info d delete h sync shift+h push shift+s source f flags c create shift+c clone + {"\u21B5"} launch s switch i info m mode d delete h sync shift+h push shift+s source f flags c create shift+c clone )} diff --git a/packages/core/src/adapters/types.ts b/packages/core/src/adapters/types.ts index 0af1464..a2b1ccf 100644 --- a/packages/core/src/adapters/types.ts +++ b/packages/core/src/adapters/types.ts @@ -24,6 +24,12 @@ export interface LaunchOptions { env: Record; cwd?: string; beforeSpawn?: () => Promise; + /** + * Force a specific launch mode regardless of profile/CLI settings. + * Orchestrators (roundtable, pipelines) should pass `"worker"` to ensure + * the tool runs under ARC supervision. + */ + launchMode?: "native" | "worker"; } /** Handle for a running agent process. */ diff --git a/packages/core/src/agent-client/README.md b/packages/core/src/agent-client/README.md new file mode 100644 index 0000000..81946fa --- /dev/null +++ b/packages/core/src/agent-client/README.md @@ -0,0 +1,116 @@ +# agent-client — CLI-spawn Agent Foundation + +Phase 1 of the AI chat / roundtable plan. See `docs/plans/ai-and-roundtable.md` +decision **AD-1** for the why. + +## What it is + +A tiny abstraction for programmatic agent invocation: + +1. Take an ARC `Profile`. +2. Spawn the profile's native CLI tool in **one-shot mode** (no TUI). +3. Stream stdout back as structured `AgentChunk`s. +4. Optionally inject an MCP config at launch so the agent can call our tools. + +We do **not** build a direct HTTP LLM client. We orchestrate the agent tools +that already exist (`claude`, `codex`, `gemini`), letting them handle auth, +retries, streaming, and tool-use negotiation. + +## Usage + +```ts +import { getAgentClientForProfile } from "@axiom-labs/arc-core"; + +const client = getAgentClientForProfile(profile); + +for await (const chunk of client.send("What profiles do I have?")) { + if (chunk.type === "text") process.stdout.write(chunk.content); + if (chunk.type === "done") break; +} + +await client.shutdown(); +``` + +### With MCP injection + +```ts +for await (const chunk of client.send("List my profiles", { + mcpConfig: { + mode: "config-file", // must match profile.tool's mcpMode + servers: { + arc: { + command: "node", + args: ["/path/to/arc-mcp-server.mjs"], + env: { ARC_AUTH_TOKEN: "..." }, + }, + }, + }, +})) { + if (chunk.type === "tool_call") console.log("tool:", chunk.tool, chunk.input); + if (chunk.type === "tool_result") console.log("result:", chunk.result); + if (chunk.type === "text") process.stdout.write(chunk.content); +} +``` + +### With instructions / abort / timeout + +```ts +const ac = new AbortController(); +setTimeout(() => ac.abort(), 30_000); + +for await (const chunk of client.send("Summarize my setup", { + instructions: "You are an ARC operator. Be concise.", + signal: ac.signal, + timeoutMs: 45_000, +})) { /* ... */ } +``` + +## MCP injection modes + +| Tool | `mcpMode` | How | +|--------|----------------|-----| +| claude | `config-file` | Write `{mcpServers:{...}}` to a temp file, pass `--mcp-config `. | +| codex | `config-args` | Emit `-c mcp.servers..=` repeated. | +| gemini | `mcp-add` | Run `gemini mcp add --scope project ...` before launch. | + +All three mirror Agent-Forge's `agents.json` tribal knowledge. + +## Output parsing + +| Tool | stdout format | Parser | +|--------|---------------------|--------| +| claude | line-delimited JSON | `parseClaudeStreamJson` | +| codex | line-delimited JSON | `parseCodexJson` | +| gemini | plain text | `parseGeminiPlain` (text passthrough) | + +Each parser returns `AgentChunk | null` per line. Unknown event shapes return +`null` (we skip instead of crashing). + +## Known gaps / TODOs + +- **System prompt.** Claude's `-p` mode has no dedicated system-prompt flag. + We synthesize one by wrapping: `System: ...\n\nUser: ...`. Same for Codex + (no separate system flag in `exec --json`). Gemini follows the same pattern + for consistency. +- **Codex event shape.** Codex's `exec --json` format has shifted between + versions; the parser accepts both `kind` and `type` discriminators and maps + the recognizable subset. Add real-binary smoke tests in Phase 4. +- **Gemini tool events.** Gemini `-p` prints plain text only; structured tool + events arrive via the MCP side-channel, not stdout. +- **Flags verification.** The exact one-shot flags (`--output-format stream-json + --verbose` for claude, `exec --json` for codex, `-p` for gemini) are drawn + from Agent-Forge + upstream docs. Verify with real binaries in Phase 4 smoke + tests before depending on them in CI. +- **Aider / opencode.** Omitted from Phase 1 — they're TUI-only with no clean + one-shot mode. + +## Files + +- `types.ts` — `AgentClient`, `AgentChunk`, `McpConfigInjection`, `AgentProgram`. +- `registry.ts` — `AGENT_PROGRAMS` ported from Agent-Forge `agents.json`. +- `mcp-injection.ts` — three injection helpers + temp-file cleanup. +- `stream-parsers.ts` — line → `AgentChunk` for each tool's output dialect. +- `spawn-helpers.ts` — internal spawn + stream primitive. +- `claude.ts`, `codex.ts`, `gemini.ts` — per-tool `AgentClient` classes. +- `dispatch.ts` — `getAgentClientForProfile`. +- `index.ts` — barrel export. diff --git a/packages/core/src/agent-client/claude.ts b/packages/core/src/agent-client/claude.ts new file mode 100644 index 0000000..e6f68c6 --- /dev/null +++ b/packages/core/src/agent-client/claude.ts @@ -0,0 +1,83 @@ +/** + * Claude one-shot agent client. + * + * Invocation: + * claude -p "" --output-format stream-json --verbose [--mcp-config ] + * + * Claude's `-p` mode has no dedicated system-prompt flag, so when + * `opts.instructions` is supplied we prepend it to the prompt with a + * "System: ... / User: ..." wrapper — the cleanest way to give it + * role-separated context without a TUI session. + * + * Output is line-delimited JSON parsed by `parseClaudeStreamJson`. The + * client is one-shot: `shutdown()` is a no-op (the child exits on its own). + */ + +import type { AgentClient, AgentChunk, AgentSendOptions } from "./types.js"; +import { AGENT_PROGRAMS } from "./registry.js"; +import { writeMcpConfigFile } from "./mcp-injection.js"; +import { parseClaudeStreamJson } from "./stream-parsers.js"; +import { runAgentProcess, type SpawnFn } from "./spawn-helpers.js"; + +export interface ClaudeAgentClientOptions { + /** Optional working directory override for the child. */ + cwd?: string; + /** Extra environment merged on top of `process.env`. */ + env?: NodeJS.ProcessEnv; + /** Optional spawn override for tests. */ + spawnFn?: SpawnFn; + /** Override the binary (used by tests / Windows shims). */ + command?: string; +} + +export class ClaudeAgentClient implements AgentClient { + constructor(private readonly cfg: ClaudeAgentClientOptions = {}) {} + + send(prompt: string, opts?: AgentSendOptions): AsyncIterable { + const self = this; + return { + [Symbol.asyncIterator](): AsyncIterator { + return self.#iterate(prompt, opts ?? {}); + }, + }; + } + + async shutdown(): Promise { + // One-shot: nothing to tear down. + } + + #iterate(prompt: string, opts: AgentSendOptions): AsyncIterator { + const program = AGENT_PROGRAMS["claude"]!; + const command = this.cfg.command ?? program.command; + + const args = [...program.oneShotFlags]; + + if (opts.mcpConfig) { + if (opts.mcpConfig.mode !== "config-file") { + throw new Error( + `ClaudeAgentClient requires mcpConfig.mode=config-file (got ${opts.mcpConfig.mode})`, + ); + } + const file = writeMcpConfigFile(opts.mcpConfig); + args.push("--mcp-config", file); + } + + const fullPrompt = opts.instructions + ? `System: ${opts.instructions}\n\nUser: ${prompt}` + : prompt; + + // Append prompt after `-p`. Claude accepts the positional prompt last. + // The `-p` flag is already in oneShotFlags[0]. + args.push(fullPrompt); + + return runAgentProcess({ + command, + args, + env: this.cfg.env, + cwd: this.cfg.cwd, + parse: parseClaudeStreamJson, + opts, + spawnFn: this.cfg.spawnFn, + }); + } +} diff --git a/packages/core/src/agent-client/codex.ts b/packages/core/src/agent-client/codex.ts new file mode 100644 index 0000000..48d7e7a --- /dev/null +++ b/packages/core/src/agent-client/codex.ts @@ -0,0 +1,76 @@ +/** + * Codex one-shot agent client. + * + * Invocation: + * codex exec --json [-c mcp.servers..= ...] < prompt-on-stdin + * + * Codex reads the prompt from stdin and emits line-delimited JSON events + * on stdout (see `parseCodexJson`). MCP injection uses `-c` config flags. + * + * Like Claude, `instructions` is folded into the stdin payload as a + * `System: ... / User: ...` block — Codex doesn't have a separate + * system-prompt CLI flag either. + */ + +import type { AgentClient, AgentChunk, AgentSendOptions } from "./types.js"; +import { AGENT_PROGRAMS } from "./registry.js"; +import { buildMcpConfigArgs } from "./mcp-injection.js"; +import { parseCodexJson } from "./stream-parsers.js"; +import { runAgentProcess, type SpawnFn } from "./spawn-helpers.js"; + +export interface CodexAgentClientOptions { + cwd?: string; + env?: NodeJS.ProcessEnv; + spawnFn?: SpawnFn; + command?: string; +} + +export class CodexAgentClient implements AgentClient { + constructor(private readonly cfg: CodexAgentClientOptions = {}) {} + + send(prompt: string, opts?: AgentSendOptions): AsyncIterable { + const self = this; + return { + [Symbol.asyncIterator](): AsyncIterator { + return self.#iterate(prompt, opts ?? {}); + }, + }; + } + + async shutdown(): Promise { + // One-shot: nothing to tear down. + } + + #iterate(prompt: string, opts: AgentSendOptions): AsyncIterator { + const program = AGENT_PROGRAMS["codex"]!; + const command = this.cfg.command ?? program.command; + + const args = [...program.oneShotFlags]; + if (opts.mcpConfig) { + if (opts.mcpConfig.mode !== "config-args") { + throw new Error( + `CodexAgentClient requires mcpConfig.mode=config-args (got ${opts.mcpConfig.mode})`, + ); + } + args.unshift(...buildMcpConfigArgs(opts.mcpConfig)); + // Codex expects `-c ...` flags before the subcommand. But `exec` is + // already in oneShotFlags, which we cloned before unshifting, so the + // order ends up: -c k=v ... exec --json. That matches codex CLI usage. + } + + const stdinPayload = opts.instructions + ? `System: ${opts.instructions}\n\nUser: ${prompt}` + : prompt; + + return runAgentProcess({ + command, + args, + env: this.cfg.env, + cwd: this.cfg.cwd, + stdinPayload, + parse: parseCodexJson, + opts, + spawnFn: this.cfg.spawnFn, + }); + } +} diff --git a/packages/core/src/agent-client/dispatch.ts b/packages/core/src/agent-client/dispatch.ts new file mode 100644 index 0000000..1209121 --- /dev/null +++ b/packages/core/src/agent-client/dispatch.ts @@ -0,0 +1,77 @@ +/** + * Dispatcher — pick the right `AgentClient` for a given profile. + * + * Unsupported tools (`hermes`, `openclaw`, unknown) throw a clear error so + * the caller can decide whether to fall back or surface a user-facing message. + */ + +import type { Profile } from "../types.js"; +import type { AgentClient } from "./types.js"; +import { ClaudeAgentClient } from "./claude.js"; +import { CodexAgentClient } from "./codex.js"; +import { GeminiAgentClient } from "./gemini.js"; +import { AGENT_PROGRAMS } from "./registry.js"; + +export interface DispatchOptions { + /** Working directory override. Defaults to `process.cwd()`. */ + cwd?: string; + /** Extra env on top of `profile.envOverrides`. */ + extraEnv?: NodeJS.ProcessEnv; +} + +/** + * Build the child process environment from a profile. Merges: + * 1. `process.env` + * 2. profile-level env overrides + * 3. `ARC_CONFIG_DIR` set to the profile's isolated config dir + * 4. caller-supplied `extraEnv` + */ +export function buildProfileEnv(profile: Profile, extraEnv?: NodeJS.ProcessEnv): NodeJS.ProcessEnv { + const env: NodeJS.ProcessEnv = { ...process.env }; + if (profile.envOverrides) { + for (const [k, v] of Object.entries(profile.envOverrides)) { + env[k] = v; + } + } + if (profile.configDir) { + env["ARC_CONFIG_DIR"] = profile.configDir; + } + if (extraEnv) { + for (const [k, v] of Object.entries(extraEnv)) { + if (v !== undefined) env[k] = v; + } + } + return env; +} + +/** + * Return an `AgentClient` bound to the profile's tool, configDir, and env. + * Throws for tools that don't have a CLI-spawn implementation yet. + */ +export function getAgentClientForProfile(profile: Profile, dispatch: DispatchOptions = {}): AgentClient { + const tool = profile.tool; + if (!tool) { + throw new Error(`Profile has no tool set; cannot build an AgentClient`); + } + const program = AGENT_PROGRAMS[tool]; + if (!program) { + throw new Error( + `No AgentClient implementation for tool "${tool}". Supported: ${Object.keys(AGENT_PROGRAMS).join(", ")}.`, + ); + } + + const env = buildProfileEnv(profile, dispatch.extraEnv); + const cwd = dispatch.cwd; + + switch (tool) { + case "claude": + return new ClaudeAgentClient({ cwd, env }); + case "codex": + return new CodexAgentClient({ cwd, env }); + case "gemini": + return new GeminiAgentClient({ cwd, env }); + default: + // Unreachable given the registry lookup above, but keeps TS exhaustive. + throw new Error(`Unsupported tool: ${tool}`); + } +} diff --git a/packages/core/src/agent-client/gemini.ts b/packages/core/src/agent-client/gemini.ts new file mode 100644 index 0000000..1874a1b --- /dev/null +++ b/packages/core/src/agent-client/gemini.ts @@ -0,0 +1,73 @@ +/** + * Gemini one-shot agent client. + * + * Invocation: + * gemini -p "" + * + * Gemini's `-p` mode prints plain text with no structured tool events on + * stdout — tool use flows through the MCP side-channel registered via + * `gemini mcp add ...` before launch. For this phase we just stream text + * lines and emit `{type:"done"}` on process exit. + */ + +import type { AgentClient, AgentChunk, AgentSendOptions } from "./types.js"; +import { AGENT_PROGRAMS } from "./registry.js"; +import { runMcpAdd } from "./mcp-injection.js"; +import { parseGeminiPlain } from "./stream-parsers.js"; +import { runAgentProcess, type SpawnFn } from "./spawn-helpers.js"; + +export interface GeminiAgentClientOptions { + cwd?: string; + env?: NodeJS.ProcessEnv; + spawnFn?: SpawnFn; + command?: string; + /** When false, skip pre-launch `gemini mcp add` (tests). */ + autoRegisterMcp?: boolean; +} + +export class GeminiAgentClient implements AgentClient { + constructor(private readonly cfg: GeminiAgentClientOptions = {}) {} + + send(prompt: string, opts?: AgentSendOptions): AsyncIterable { + const self = this; + return { + [Symbol.asyncIterator](): AsyncIterator { + return self.#iterate(prompt, opts ?? {}); + }, + }; + } + + async shutdown(): Promise { + // One-shot: nothing to tear down. + } + + async *#iterate(prompt: string, opts: AgentSendOptions): AsyncGenerator { + const program = AGENT_PROGRAMS["gemini"]!; + const command = this.cfg.command ?? program.command; + + if (opts.mcpConfig && this.cfg.autoRegisterMcp !== false) { + if (opts.mcpConfig.mode !== "mcp-add") { + throw new Error( + `GeminiAgentClient requires mcpConfig.mode=mcp-add (got ${opts.mcpConfig.mode})`, + ); + } + await runMcpAdd(opts.mcpConfig, command); + } + + const fullPrompt = opts.instructions + ? `System: ${opts.instructions}\n\nUser: ${prompt}` + : prompt; + + const args = [...program.oneShotFlags, fullPrompt]; + + yield* runAgentProcess({ + command, + args, + env: this.cfg.env, + cwd: this.cfg.cwd, + parse: parseGeminiPlain, + opts, + spawnFn: this.cfg.spawnFn, + }); + } +} diff --git a/packages/core/src/agent-client/index.ts b/packages/core/src/agent-client/index.ts new file mode 100644 index 0000000..6207af6 --- /dev/null +++ b/packages/core/src/agent-client/index.ts @@ -0,0 +1,43 @@ +/** + * Agent client — Phase 1 public surface. + * + * Re-exports types, registry helpers, per-agent clients, and the dispatcher. + * See `./README.md` for usage. + */ + +export type { + AgentClient, + AgentChunk, + AgentSendOptions, + AgentProgram, + AgentOutputFormat, + InputMethod, + McpConfigMode, + McpConfigInjection, + McpServerDef, +} from "./types.js"; + +export { AGENT_PROGRAMS, resolveAgentProgram } from "./registry.js"; + +export { + writeMcpConfigFile, + buildMcpConfigArgs, + runMcpAdd, + cleanupMcpTempFiles, +} from "./mcp-injection.js"; + +export { + parseClaudeStreamJson, + parseCodexJson, + parseGeminiPlain, +} from "./stream-parsers.js"; + +export { ClaudeAgentClient } from "./claude.js"; +export { CodexAgentClient } from "./codex.js"; +export { GeminiAgentClient } from "./gemini.js"; + +export { + getAgentClientForProfile, + buildProfileEnv, + type DispatchOptions, +} from "./dispatch.js"; diff --git a/packages/core/src/agent-client/mcp-injection.ts b/packages/core/src/agent-client/mcp-injection.ts new file mode 100644 index 0000000..ff63955 --- /dev/null +++ b/packages/core/src/agent-client/mcp-injection.ts @@ -0,0 +1,174 @@ +/** + * MCP injection helpers for the three supported modes: + * - `config-file` — write `{ mcpServers: {...} }` JSON to a temp file. + * - `config-args` — emit repeated `-c mcp.servers..=` args. + * - `mcp-add` — run ` mcp add --scope [args]` + * before the main launch. + * + * See docs/plans/ai-and-roundtable.md AD-1 / AD-7. + */ + +import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import { spawn } from "node:child_process"; +import type { McpConfigInjection, McpServerDef } from "./types.js"; + +/** Temp files created this session, cleaned up on demand. */ +const tempFiles: string[] = []; +const tempDirs: string[] = []; + +/** + * Write the MCP injection as a JSON file suitable for Claude's `--mcp-config`. + * Returns the absolute path of the file. The file is tracked for later cleanup + * via `cleanupMcpTempFiles`. + * + * Throws if `injection.mode !== "config-file"` — callers must use the matching + * helper for other modes. + */ +export function writeMcpConfigFile(injection: McpConfigInjection): string { + if (injection.mode !== "config-file") { + throw new Error( + `writeMcpConfigFile requires mode=config-file (got ${injection.mode})`, + ); + } + + const dir = mkdtempSync(path.join(tmpdir(), "arc-mcp-")); + tempDirs.push(dir); + const file = path.join(dir, "mcp-config.json"); + + const payload = { mcpServers: injection.servers }; + writeFileSync(file, JSON.stringify(payload, null, 2), "utf8"); + tempFiles.push(file); + return file; +} + +/** + * Build the `-c mcp.servers..=` flag pairs for Codex. + * Produces flat tokens, e.g.: + * + * ["-c", "mcp.servers.arc.command='node'", + * "-c", "mcp.servers.arc.args=['...']", + * "-c", "mcp.servers.arc.env.FOO='bar'"] + * + * Values are emitted as TOML single-quoted literals (`'...'`) with `'` + * doubled per TOML escape rules. Arrays are serialized as TOML inline arrays. + */ +export function buildMcpConfigArgs(injection: McpConfigInjection): string[] { + if (injection.mode !== "config-args") { + throw new Error( + `buildMcpConfigArgs requires mode=config-args (got ${injection.mode})`, + ); + } + + const out: string[] = []; + for (const [name, def] of Object.entries(injection.servers)) { + const base = `mcp.servers.${name}`; + out.push("-c", `${base}.command=${toTomlLiteral(def.command)}`); + + if (def.args && def.args.length > 0) { + const list = def.args.map(toTomlLiteral).join(", "); + out.push("-c", `${base}.args=[${list}]`); + } + + if (def.env) { + for (const [k, v] of Object.entries(def.env)) { + out.push("-c", `${base}.env.${k}=${toTomlLiteral(v)}`); + } + } + } + return out; +} + +/** + * Execute ` mcp add --scope [args...]` for each + * server in the injection. Used by Gemini (and any other tool that registers + * servers via CLI pre-launch). + */ +export async function runMcpAdd( + injection: McpConfigInjection, + binary: string, + scope: "user" | "project" = "project", +): Promise { + if (injection.mode !== "mcp-add") { + throw new Error( + `runMcpAdd requires mode=mcp-add (got ${injection.mode})`, + ); + } + + for (const [name, def] of Object.entries(injection.servers)) { + const envFlags: string[] = []; + if (def.env) { + for (const [k, v] of Object.entries(def.env)) { + envFlags.push("-e", `${k}=${v}`); + } + } + + const args = [ + "mcp", + "add", + "--scope", + scope, + name, + ...envFlags, + def.command, + ...(def.args ?? []), + ]; + + await runCmd(binary, args); + } +} + +/** + * Remove every temp file / dir created during this process lifetime. + * Safe to call more than once; errors are swallowed (best-effort cleanup). + */ +export function cleanupMcpTempFiles(): void { + for (const f of tempFiles.splice(0)) { + try { + rmSync(f, { force: true }); + } catch { + /* ignore */ + } + } + for (const d of tempDirs.splice(0)) { + try { + rmSync(d, { recursive: true, force: true }); + } catch { + /* ignore */ + } + } +} + +/** Internal: run a child process to completion. Rejects on non-zero exit. */ +function runCmd(cmd: string, args: string[]): Promise { + return new Promise((resolve, reject) => { + const child = spawn(cmd, args, { + stdio: ["ignore", "pipe", "pipe"], + shell: process.platform === "win32", + windowsHide: true, + }); + let stderr = ""; + child.stderr?.on("data", (b: Buffer) => { + stderr += b.toString("utf8"); + }); + child.on("error", reject); + child.on("close", (code) => { + if (code === 0) resolve(); + else reject(new Error(`${cmd} exited with code ${code}: ${stderr}`)); + }); + }); +} + +/** Render a value as a TOML single-quoted string literal. */ +function toTomlLiteral(value: string): string { + return `'${value.replace(/'/g, "''")}'`; +} + +// Internal accessor for tests (not exported from the package barrel). +export function __peekTempFiles(): { files: string[]; dirs: string[] } { + return { files: [...tempFiles], dirs: [...tempDirs] }; +} + +// Re-export type for convenience. +export type { McpServerDef }; diff --git a/packages/core/src/agent-client/registry.ts b/packages/core/src/agent-client/registry.ts new file mode 100644 index 0000000..6d9338b --- /dev/null +++ b/packages/core/src/agent-client/registry.ts @@ -0,0 +1,67 @@ +/** + * Agent program registry — ported from Agent-Forge's `agents.json`. + * See docs/plans/ai-and-roundtable.md AD-1 / AD-7. + * + * Each entry describes the one-shot CLI invocation for a known tool. + * The registry is deliberately narrow: only tools for which we can spawn a + * non-interactive process and capture structured output. TUI-only tools + * (aider, opencode) are omitted for Phase 1. + */ + +import type { AgentTool } from "../types.js"; +import type { AgentProgram } from "./types.js"; + +/** + * Typed program table. Keyed by the literal tool name stored on `Profile.tool`. + * + * Claude (`claude -p --output-format stream-json --verbose`): + * - stream-json emits line-delimited JSON per message/content_block. + * - MCP injection via `--mcp-config `. + * + * Codex (`codex exec --json`): + * - Reads prompt from stdin, emits one JSON event per line. + * - MCP injection via repeated `-c mcp.servers..=` args. + * + * Gemini (`gemini -p `): + * - Plain text on stdout; structured tool events flow through MCP side-channel. + * - MCP injection via pre-launch `gemini mcp add` command. + */ +export const AGENT_PROGRAMS: Record = { + claude: { + tool: "claude", + command: "claude", + oneShotFlags: ["-p", "--output-format", "stream-json", "--verbose"], + inputMethod: "direct", + mcpMode: "config-file", + outputFormat: "stream-json", + readyMarker: "\u276F", // ❯ + }, + codex: { + tool: "codex", + command: "codex", + oneShotFlags: ["exec", "--json"], + inputMethod: "direct", + mcpMode: "config-args", + outputFormat: "codex-json", + readyMarker: "\u203A", // › + }, + gemini: { + tool: "gemini", + command: "gemini", + oneShotFlags: ["-p"], + inputMethod: "direct", + mcpMode: "mcp-add", + outputFormat: "plain", + readyMarker: "Type your message", + }, +}; + +/** + * Resolve the registry entry for a given tool name. + * Returns `undefined` for unknown tools so the caller can surface a clear + * error rather than getting a partially-constructed program. + */ +export function resolveAgentProgram(tool: AgentTool | undefined): AgentProgram | undefined { + if (!tool) return undefined; + return AGENT_PROGRAMS[tool]; +} diff --git a/packages/core/src/agent-client/spawn-helpers.ts b/packages/core/src/agent-client/spawn-helpers.ts new file mode 100644 index 0000000..566e30e --- /dev/null +++ b/packages/core/src/agent-client/spawn-helpers.ts @@ -0,0 +1,157 @@ +/** + * Internal spawn + stream helpers shared by claude.ts / codex.ts / gemini.ts. + * + * We intentionally avoid `spawnManagedProcess` from `../process.ts` here — + * that helper is intended for long-lived adapters, logs every spawn, and + * uses process-group semantics. The agent client needs a much lighter + * "spawn, read lines, kill on abort, done" primitive. + */ + +import { spawn, type ChildProcessWithoutNullStreams, type SpawnOptionsWithoutStdio } from "node:child_process"; +import { createInterface } from "node:readline"; +import type { AgentChunk, AgentSendOptions } from "./types.js"; + +/** + * Inject for tests — swap the child_process `spawn` implementation without + * touching global state. Tests pass a fake that returns a stub `ChildProcess` + * with programmable stdout / exit. + */ +export type SpawnFn = typeof spawn; + +/** + * Spawn the agent binary and return an async iterable of parsed chunks. + * + * - `parse(line)` converts each stdout line into `AgentChunk | null`. + * - On child close, emits a terminal `{ type: "done" }` chunk unless the + * parser already emitted one. + * - Honors `opts.signal` and `opts.timeoutMs`. + */ +export async function* runAgentProcess(params: { + command: string; + args: string[]; + env?: NodeJS.ProcessEnv; + cwd?: string; + stdinPayload?: string; // prompt written to stdin (codex) + parse: (line: string) => AgentChunk | null; + opts?: AgentSendOptions; + spawnFn?: SpawnFn; +}): AsyncGenerator { + const spawnImpl = params.spawnFn ?? spawn; + const spawnOpts: SpawnOptionsWithoutStdio = { + cwd: params.cwd, + env: params.env ?? process.env, + windowsHide: true, + // Intentionally no `shell: true` — we're passing args through as-is. + }; + + const child = spawnImpl(params.command, params.args, spawnOpts) as ChildProcessWithoutNullStreams; + + // ── Abort + timeout plumbing ── + const abortHandler = () => { + try { + child.kill("SIGTERM"); + } catch { + /* already dead */ + } + }; + if (params.opts?.signal) { + if (params.opts.signal.aborted) abortHandler(); + else params.opts.signal.addEventListener("abort", abortHandler, { once: true }); + } + + let timeoutHandle: NodeJS.Timeout | undefined; + let timedOut = false; + if (params.opts?.timeoutMs && params.opts.timeoutMs > 0) { + timeoutHandle = setTimeout(() => { + timedOut = true; + abortHandler(); + }, params.opts.timeoutMs); + } + + // ── Deliver prompt via stdin if configured ── + if (params.stdinPayload !== undefined && child.stdin) { + try { + child.stdin.end(params.stdinPayload); + } catch { + /* ignore — child may have died */ + } + } else if (child.stdin) { + // close stdin so the child doesn't hang waiting for input + try { + child.stdin.end(); + } catch { + /* ignore */ + } + } + + // ── Buffer + forward chunks ── + const queue: AgentChunk[] = []; + let resolveWaiter: (() => void) | null = null; + let finished = false; + let spawnError: Error | null = null; + let emittedDone = false; + + const push = (chunk: AgentChunk) => { + if (chunk.type === "done") emittedDone = true; + queue.push(chunk); + resolveWaiter?.(); + resolveWaiter = null; + }; + + const stdoutRl = createInterface({ input: child.stdout }); + stdoutRl.on("line", (line) => { + try { + const chunk = params.parse(line); + if (chunk) push(chunk); + } catch (err) { + push({ type: "error", message: (err as Error).message }); + } + }); + + // Capture stderr as soft errors so callers can surface them. + let stderrBuf = ""; + child.stderr.on("data", (b: Buffer) => { + stderrBuf += b.toString("utf8"); + }); + + child.on("error", (err) => { + spawnError = err; + }); + + child.on("close", (code) => { + if (timeoutHandle) clearTimeout(timeoutHandle); + if (!emittedDone) { + if (spawnError) { + push({ type: "error", message: spawnError.message }); + push({ type: "done", reason: "error" }); + } else if (timedOut) { + push({ type: "error", message: "agent process timed out" }); + push({ type: "done", reason: "error" }); + } else if (code === 0) { + push({ type: "done", reason: "end_turn" }); + } else { + if (stderrBuf.trim()) { + push({ type: "error", message: stderrBuf.trim().slice(0, 2000) }); + } + push({ type: "done", reason: code === null ? "stop" : "error" }); + } + } + finished = true; + resolveWaiter?.(); + resolveWaiter = null; + }); + + // ── Consumer loop ── + while (true) { + if (queue.length > 0) { + const next = queue.shift() as AgentChunk; + yield next; + if (next.type === "done") return; + continue; + } + if (finished) return; + await new Promise((r) => { + resolveWaiter = r; + }); + } +} diff --git a/packages/core/src/agent-client/stream-parsers.ts b/packages/core/src/agent-client/stream-parsers.ts new file mode 100644 index 0000000..6f614bb --- /dev/null +++ b/packages/core/src/agent-client/stream-parsers.ts @@ -0,0 +1,246 @@ +/** + * Stream parsers — convert a single line of agent stdout into an `AgentChunk`. + * + * Each parser is pure: (line) -> AgentChunk | null. The caller (the per-agent + * client) is responsible for buffering partial lines via `readline` and for + * emitting the terminal `{ type: "done" }` chunk when the child process exits. + * + * We tolerate unknown shapes by returning `null` — the client skips silently + * instead of crashing on an event the upstream CLI added in a newer version. + */ + +import type { AgentChunk } from "./types.js"; + +// ─── Claude (Anthropic SDK stream-json) ──────────────────────────────── + +/** + * Parse one line from `claude -p --output-format stream-json --verbose`. + * + * The Anthropic Messages streaming format wraps content blocks in events: + * - `message_start` — we ignore (no chunk to emit). + * - `content_block_start` — opens a text/tool_use/thinking block. + * - `content_block_delta` — text_delta / input_json_delta / thinking_delta. + * - `content_block_stop` — closes a block (ignored). + * - `message_delta` — carries stop_reason. + * - `message_stop` — end of turn. + * + * claude's CLI wraps this with its own envelope: + * { type: "assistant", message: { content: [...] } } (snapshot) + * { type: "assistant", event: { type: "content_block_delta", delta: {...} } } + * + * Both shapes are handled defensively. + */ +export function parseClaudeStreamJson(line: string): AgentChunk | null { + const trimmed = line.trim(); + if (!trimmed) return null; + + let obj: Record; + try { + obj = JSON.parse(trimmed) as Record; + } catch { + // Not JSON — surface as text (claude sometimes emits preamble text). + return { type: "text", content: trimmed }; + } + + // Unwrap claude's outer "event" envelope if present. + const event = (isObject(obj["event"]) ? (obj["event"] as Record) : obj); + const evType = typeof event["type"] === "string" ? (event["type"] as string) : undefined; + + // Top-level result envelope that claude emits on completion. + if (obj["type"] === "result" || evType === "result") { + const reason = coerceStopReason((obj["stop_reason"] ?? event["stop_reason"]) as unknown); + return { type: "done", reason }; + } + + switch (evType) { + case "message_start": + case "content_block_start": + case "content_block_stop": + case "ping": + return null; + + case "content_block_delta": { + const delta = event["delta"] as Record | undefined; + if (!delta) return null; + if (delta["type"] === "text_delta" && typeof delta["text"] === "string") { + return { type: "text", content: delta["text"] }; + } + if (delta["type"] === "thinking_delta" && typeof delta["thinking"] === "string") { + return { type: "thinking", content: delta["thinking"] }; + } + if (delta["type"] === "input_json_delta") { + // Partial tool input — no complete chunk yet. + return null; + } + return null; + } + + case "tool_use": { + const id = typeof event["id"] === "string" ? event["id"] : ""; + const name = typeof event["name"] === "string" ? event["name"] : ""; + return { type: "tool_call", id, tool: name, input: event["input"] ?? {} }; + } + + case "tool_result": { + const id = typeof event["tool_use_id"] === "string" ? event["tool_use_id"] : ""; + const isError = event["is_error"] === true; + return { type: "tool_result", id, result: event["content"] ?? null, isError }; + } + + case "message_delta": { + const delta = event["delta"] as Record | undefined; + const reason = coerceStopReason(delta?.["stop_reason"]); + // Only emit done once we actually have a stop reason. + if (delta && delta["stop_reason"]) return { type: "done", reason }; + return null; + } + + case "message_stop": + return { type: "done", reason: "end_turn" }; + + case "error": { + const msg = typeof event["message"] === "string" ? event["message"] : "unknown error"; + return { type: "error", message: msg }; + } + + default: + return null; + } +} + +// ─── Codex (`codex exec --json`) ─────────────────────────────────────── + +/** + * Parse one line from `codex exec --json`. + * + * Codex emits heterogenous event objects; the shape has drifted between + * versions. We do best-effort mapping based on recognizable keys: + * - `{ kind: "message", role: "assistant", text }` + * - `{ kind: "delta", text }` + * - `{ kind: "tool_call", id, name, arguments }` + * - `{ kind: "tool_result", id, output, is_error }` + * - `{ kind: "done" | "finished" | "complete", reason }` + * + * Older codex builds use `type` instead of `kind`; we accept both. + */ +export function parseCodexJson(line: string): AgentChunk | null { + const trimmed = line.trim(); + if (!trimmed) return null; + + let obj: Record; + try { + obj = JSON.parse(trimmed) as Record; + } catch { + return { type: "text", content: trimmed }; + } + + const kind = (typeof obj["kind"] === "string" + ? obj["kind"] + : typeof obj["type"] === "string" + ? obj["type"] + : "") as string; + + switch (kind) { + case "delta": + case "token": + case "text": { + const text = typeof obj["text"] === "string" + ? (obj["text"] as string) + : typeof obj["content"] === "string" + ? (obj["content"] as string) + : null; + return text !== null ? { type: "text", content: text } : null; + } + + case "message": { + const text = typeof obj["text"] === "string" + ? (obj["text"] as string) + : typeof obj["content"] === "string" + ? (obj["content"] as string) + : null; + if (obj["role"] === "assistant" && text !== null) { + return { type: "text", content: text }; + } + return null; + } + + case "tool_call": + case "function_call": { + const id = typeof obj["id"] === "string" ? obj["id"] : ""; + const name = typeof obj["name"] === "string" + ? (obj["name"] as string) + : typeof obj["tool"] === "string" + ? (obj["tool"] as string) + : ""; + const input = obj["arguments"] ?? obj["input"] ?? {}; + return { type: "tool_call", id, tool: name, input }; + } + + case "tool_result": + case "function_result": { + const id = typeof obj["id"] === "string" ? obj["id"] : ""; + const isError = obj["is_error"] === true || obj["isError"] === true; + return { type: "tool_result", id, result: obj["output"] ?? obj["result"] ?? null, isError }; + } + + case "thinking": + case "reasoning": { + const text = typeof obj["text"] === "string" + ? (obj["text"] as string) + : typeof obj["content"] === "string" + ? (obj["content"] as string) + : ""; + return { type: "thinking", content: text }; + } + + case "done": + case "finished": + case "complete": { + const reason = coerceStopReason(obj["reason"] ?? obj["stop_reason"]); + return { type: "done", reason }; + } + + case "error": { + const msg = typeof obj["message"] === "string" + ? (obj["message"] as string) + : typeof obj["error"] === "string" + ? (obj["error"] as string) + : "codex error"; + return { type: "error", message: msg }; + } + + default: + return null; + } +} + +// ─── Gemini (plain text passthrough) ─────────────────────────────────── + +/** + * Gemini in `-p` mode prints plain text with no structured events. + * Every non-empty line becomes a text chunk. The client emits a terminal + * `{ type: "done", reason: "end_turn" }` on process close; we never emit it + * from inside this parser. + */ +export function parseGeminiPlain(line: string): AgentChunk | null { + // Preserve the line exactly (including leading whitespace) — some tools + // emit significant indentation — but drop fully empty lines. + if (line.length === 0) return null; + return { type: "text", content: line }; +} + +// ─── Helpers ─────────────────────────────────────────────────────────── + +function isObject(v: unknown): v is Record { + return typeof v === "object" && v !== null && !Array.isArray(v); +} + +function coerceStopReason(v: unknown): "end_turn" | "max_turns" | "stop" | "error" { + if (v === "end_turn" || v === "max_turns" || v === "stop" || v === "error") return v; + if (typeof v === "string") { + if (v.includes("max")) return "max_turns"; + if (v.includes("error") || v.includes("fail")) return "error"; + if (v.includes("stop")) return "stop"; + } + return "end_turn"; +} diff --git a/packages/core/src/agent-client/types.ts b/packages/core/src/agent-client/types.ts new file mode 100644 index 0000000..9d4754c --- /dev/null +++ b/packages/core/src/agent-client/types.ts @@ -0,0 +1,117 @@ +/** + * Agent client types — Phase 1 (CLI-spawn foundation). + * + * This module defines the contract for programmatic agent invocation. + * Given an ARC profile, the dispatcher spawns the profile's native CLI tool + * (claude, codex, gemini) with a prompt and optional MCP injection and + * yields structured `AgentChunk`s from its streaming stdout. + * + * Design note (AD-1): we intentionally do NOT build a direct LLM HTTP client. + * We orchestrate the existing agents' own tool use via MCP. Auth, retries, + * tool schemas, and provider negotiation are handled by the native CLI. + */ + +import type { AgentTool } from "../types.js"; + +/** + * How a prompt is delivered to an agent process. + * - `direct` — passed as a CLI argument or written to stdin in one-shot mode. + * This is the only mode used in Phase 1. + * - `sendKeys` — line-by-line stdin write (future TUI mode, persistent session). + * - `pasteFromFile` — write prompt to a temp file, send `/paste ` (future). + */ +export type InputMethod = "direct" | "sendKeys" | "pasteFromFile"; + +/** + * How MCP servers are injected into an agent at launch time. + * Mirrors Agent-Forge's `agents.json` `mcpMode` field. + * + * - `config-file` — write a JSON file, pass via `--mcp-config ` (Claude). + * - `mcp-add` — run ` mcp add --scope project [args]` + * before launch (Gemini). + * - `config-args` — pass `-c mcp.servers..=` CLI args + * per server/field (Codex). + */ +export type McpConfigMode = "config-file" | "mcp-add" | "config-args"; + +/** Definition of a single MCP server to inject into the agent. */ +export interface McpServerDef { + command: string; + args?: string[]; + env?: Record; +} + +/** An MCP injection payload — the mode plus the set of servers to register. */ +export interface McpConfigInjection { + mode: McpConfigMode; + servers: Record; +} + +/** + * Structured events streamed back from an agent. + * + * `text` / `thinking` / `tool_call` / `tool_result` mirror Anthropic's content + * block taxonomy; `error` is a soft failure surfaced to the caller; `done` + * is the terminator carrying the stop reason. + */ +export type AgentChunk = + | { type: "text"; content: string } + | { type: "tool_call"; id: string; tool: string; input: unknown } + | { type: "tool_result"; id: string; result: unknown; isError?: boolean } + | { type: "thinking"; content: string } + | { type: "error"; message: string } + | { type: "done"; reason: "end_turn" | "max_turns" | "stop" | "error" }; + +/** Options accepted by `AgentClient.send`. */ +export interface AgentSendOptions { + /** MCP server(s) to inject at launch. */ + mcpConfig?: McpConfigInjection; + /** + * System instructions prepended to the prompt. Claude's one-shot `-p` mode + * has no dedicated system-prompt flag, so we synthesize one by wrapping: + * + * System: + * + * User: + * + * Gemini and Codex follow the same wrapping for consistency. + */ + instructions?: string; + /** Abort signal — cancellation triggers SIGTERM on the child. */ + signal?: AbortSignal; + /** Optional hard timeout (ms). When elapsed, the child is killed. */ + timeoutMs?: number; +} + +/** + * One-shot agent client. `send` returns an async iterable that yields + * `AgentChunk`s until the child process exits. + */ +export interface AgentClient { + send(prompt: string, opts?: AgentSendOptions): AsyncIterable; + shutdown(): Promise; +} + +/** Output format dialect we know how to parse. */ +export type AgentOutputFormat = "stream-json" | "plain" | "codex-json"; + +/** + * Registry entry describing how to launch one agent in one-shot mode. + * Ported from Agent-Forge's `agents.json`. + */ +export interface AgentProgram { + /** Binary name on PATH. */ + command: string; + /** Flags that put the CLI into one-shot / non-TUI mode. */ + oneShotFlags: string[]; + /** How the prompt is delivered. Phase 1 only uses `direct`. */ + inputMethod: InputMethod; + /** MCP injection dialect. */ + mcpMode: McpConfigMode; + /** Optional TUI readiness marker (unused in one-shot mode; kept for future). */ + readyMarker?: string; + /** Output dialect on stdout. */ + outputFormat: AgentOutputFormat; + /** Tool name for dispatcher lookup. */ + tool: AgentTool; +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index dadb1f8..ccaf2a2 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -6,6 +6,7 @@ export * from "./import-utils.js"; export * from "./keyring.js"; export * from "./secrets/index.js"; export * from "./history.js"; +export * from "./agent-client/index.js"; export * from "./lifecycle.js"; export * from "./logging.js"; export * from "./paths.js"; diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index 83cc17d..e5b7865 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -56,6 +56,14 @@ export interface Profile { instructionsFile?: string; /** OpenAI-compatible provider configuration for custom endpoints. */ provider?: ProviderConfig; + /** + * Launch mode for this profile. + * - `native` (default): full TTY handoff via spawnSync with inherited stdio. ARC exits, + * the tool paints its own TUI (e.g. Claude's statusLine). Use for daily interactive work. + * - `worker`: run under ARC supervision via the adapter's managed lifecycle — stdout is + * captured for monitoring. Use for orchestration, roundtable, and programmatic flows. + */ + launchMode?: "native" | "worker"; } export interface ArcSettings { diff --git a/tests/integration/profile-inheritance.test.ts b/tests/integration/profile-inheritance.test.ts index 954bf2a..964e169 100644 --- a/tests/integration/profile-inheritance.test.ts +++ b/tests/integration/profile-inheritance.test.ts @@ -261,7 +261,9 @@ describe("Profile Inheritance – Launch Pipeline", () => { }) as any); try { - await launchModule.handleLaunch("child", ["child"]); + // Force worker mode so adapter.launch() is invoked (native mode bypasses the adapter + // and goes straight to spawnSync, which wouldn't exercise inheritance resolution). + await launchModule.handleLaunch("child", ["child"], { launchMode: "worker" }); } catch (e: any) { // Expected: legacy spawnSync path calls process.exit if (!e.message.includes("process.exit")) throw e; diff --git a/tests/unit/agent-client/dispatch.test.ts b/tests/unit/agent-client/dispatch.test.ts new file mode 100644 index 0000000..f7b5b12 --- /dev/null +++ b/tests/unit/agent-client/dispatch.test.ts @@ -0,0 +1,76 @@ +import { describe, it, expect } from "vitest"; +import { + getAgentClientForProfile, + buildProfileEnv, +} from "../../../packages/core/src/agent-client/dispatch.js"; +import { ClaudeAgentClient } from "../../../packages/core/src/agent-client/claude.js"; +import { CodexAgentClient } from "../../../packages/core/src/agent-client/codex.js"; +import { GeminiAgentClient } from "../../../packages/core/src/agent-client/gemini.js"; +import type { Profile } from "../../../packages/core/src/types.js"; + +// TODO Phase 1 smoke: integration tests requiring real claude/codex/gemini in CI-skipped mode + +function makeProfile(tool: string | undefined, overrides: Partial = {}): Profile { + return { + authType: "api-key", + tool, + configDir: "/tmp/arc-test-profile", + createdAt: new Date().toISOString(), + ...overrides, + }; +} + +describe("getAgentClientForProfile", () => { + it("returns a ClaudeAgentClient for claude profiles", () => { + const client = getAgentClientForProfile(makeProfile("claude")); + expect(client).toBeInstanceOf(ClaudeAgentClient); + }); + + it("returns a CodexAgentClient for codex profiles", () => { + const client = getAgentClientForProfile(makeProfile("codex")); + expect(client).toBeInstanceOf(CodexAgentClient); + }); + + it("returns a GeminiAgentClient for gemini profiles", () => { + const client = getAgentClientForProfile(makeProfile("gemini")); + expect(client).toBeInstanceOf(GeminiAgentClient); + }); + + it("throws on unsupported tool (hermes)", () => { + expect(() => getAgentClientForProfile(makeProfile("hermes"))).toThrow( + /No AgentClient implementation/, + ); + }); + + it("throws on unsupported tool (openclaw)", () => { + expect(() => getAgentClientForProfile(makeProfile("openclaw"))).toThrow( + /No AgentClient implementation/, + ); + }); + + it("throws on missing tool", () => { + expect(() => getAgentClientForProfile(makeProfile(undefined))).toThrow( + /no tool set/, + ); + }); +}); + +describe("buildProfileEnv", () => { + it("merges process.env + envOverrides + ARC_CONFIG_DIR", () => { + const profile = makeProfile("claude", { + configDir: "/custom/dir", + envOverrides: { FOO: "bar" }, + }); + const env = buildProfileEnv(profile); + expect(env["FOO"]).toBe("bar"); + expect(env["ARC_CONFIG_DIR"]).toBe("/custom/dir"); + // Inherit PATH from process.env (sanity: not undefined) + expect(env["PATH"]).toBeDefined(); + }); + + it("applies extraEnv last, overriding profile overrides", () => { + const profile = makeProfile("claude", { envOverrides: { FOO: "bar" } }); + const env = buildProfileEnv(profile, { FOO: "baz" }); + expect(env["FOO"]).toBe("baz"); + }); +}); diff --git a/tests/unit/agent-client/mcp-injection.test.ts b/tests/unit/agent-client/mcp-injection.test.ts new file mode 100644 index 0000000..613f677 --- /dev/null +++ b/tests/unit/agent-client/mcp-injection.test.ts @@ -0,0 +1,114 @@ +import { describe, it, expect, afterEach } from "vitest"; +import { existsSync, readFileSync } from "node:fs"; +import { + writeMcpConfigFile, + buildMcpConfigArgs, + cleanupMcpTempFiles, + __peekTempFiles, +} from "../../../packages/core/src/agent-client/mcp-injection.js"; +import type { McpConfigInjection } from "../../../packages/core/src/agent-client/types.js"; + +// TODO Phase 1 smoke: integration tests requiring real claude/codex/gemini in CI-skipped mode + +afterEach(() => { + cleanupMcpTempFiles(); +}); + +describe("writeMcpConfigFile", () => { + it("writes { mcpServers: {...} } JSON to a temp file", () => { + const injection: McpConfigInjection = { + mode: "config-file", + servers: { + arc: { + command: "node", + args: ["server.mjs"], + env: { TOKEN: "xyz" }, + }, + }, + }; + const file = writeMcpConfigFile(injection); + expect(existsSync(file)).toBe(true); + + const parsed = JSON.parse(readFileSync(file, "utf8")); + expect(parsed).toEqual({ + mcpServers: { + arc: { + command: "node", + args: ["server.mjs"], + env: { TOKEN: "xyz" }, + }, + }, + }); + }); + + it("tracks temp files so cleanup can remove them", () => { + const injection: McpConfigInjection = { + mode: "config-file", + servers: { arc: { command: "node" } }, + }; + const file = writeMcpConfigFile(injection); + expect(__peekTempFiles().files).toContain(file); + cleanupMcpTempFiles(); + expect(existsSync(file)).toBe(false); + expect(__peekTempFiles().files).toHaveLength(0); + }); + + it("rejects the wrong mode", () => { + expect(() => + writeMcpConfigFile({ mode: "config-args", servers: {} } as McpConfigInjection), + ).toThrow(/config-file/); + }); +}); + +describe("buildMcpConfigArgs", () => { + it("emits -c flags with TOML literals for command/args/env", () => { + const injection: McpConfigInjection = { + mode: "config-args", + servers: { + arc: { + command: "node", + args: ["/path/to/server.mjs"], + env: { TOKEN: "abc", NAME: "bob's server" }, + }, + }, + }; + const args = buildMcpConfigArgs(injection); + + // Flat pairs of ["-c", "expr"] + expect(args[0]).toBe("-c"); + expect(args[1]).toBe("mcp.servers.arc.command='node'"); + expect(args[2]).toBe("-c"); + expect(args[3]).toBe("mcp.servers.arc.args=['/path/to/server.mjs']"); + + // Env entries follow — order not guaranteed but each comes as a -c pair + const env1Idx = args.indexOf("mcp.servers.arc.env.TOKEN='abc'"); + expect(env1Idx).toBeGreaterThan(0); + expect(args[env1Idx - 1]).toBe("-c"); + + const env2Idx = args.indexOf("mcp.servers.arc.env.NAME='bob''s server'"); + expect(env2Idx).toBeGreaterThan(0); + }); + + it("handles servers with no args / env", () => { + const injection: McpConfigInjection = { + mode: "config-args", + servers: { bare: { command: "echo" } }, + }; + const args = buildMcpConfigArgs(injection); + expect(args).toEqual(["-c", "mcp.servers.bare.command='echo'"]); + }); + + it("rejects the wrong mode", () => { + expect(() => + buildMcpConfigArgs({ mode: "config-file", servers: {} } as McpConfigInjection), + ).toThrow(/config-args/); + }); +}); + +describe("cleanupMcpTempFiles", () => { + it("is idempotent (safe to call with nothing tracked)", () => { + cleanupMcpTempFiles(); + cleanupMcpTempFiles(); + expect(__peekTempFiles().files).toHaveLength(0); + }); +}); diff --git a/tests/unit/agent-client/registry.test.ts b/tests/unit/agent-client/registry.test.ts new file mode 100644 index 0000000..673bda6 --- /dev/null +++ b/tests/unit/agent-client/registry.test.ts @@ -0,0 +1,53 @@ +import { describe, it, expect } from "vitest"; +import { + AGENT_PROGRAMS, + resolveAgentProgram, +} from "../../../packages/core/src/agent-client/registry.js"; + +describe("AGENT_PROGRAMS", () => { + it("has claude entry with stream-json output and --mcp-config mode", () => { + const p = AGENT_PROGRAMS["claude"]; + expect(p).toBeDefined(); + expect(p?.command).toBe("claude"); + expect(p?.oneShotFlags).toEqual(["-p", "--output-format", "stream-json", "--verbose"]); + expect(p?.inputMethod).toBe("direct"); + expect(p?.mcpMode).toBe("config-file"); + expect(p?.outputFormat).toBe("stream-json"); + }); + + it("has codex entry with exec --json and config-args mcp mode", () => { + const p = AGENT_PROGRAMS["codex"]; + expect(p).toBeDefined(); + expect(p?.command).toBe("codex"); + expect(p?.oneShotFlags).toEqual(["exec", "--json"]); + expect(p?.mcpMode).toBe("config-args"); + expect(p?.outputFormat).toBe("codex-json"); + }); + + it("has gemini entry with -p plain output and mcp-add injection", () => { + const p = AGENT_PROGRAMS["gemini"]; + expect(p).toBeDefined(); + expect(p?.command).toBe("gemini"); + expect(p?.oneShotFlags).toEqual(["-p"]); + expect(p?.mcpMode).toBe("mcp-add"); + expect(p?.outputFormat).toBe("plain"); + }); +}); + +describe("resolveAgentProgram", () => { + it("returns the entry for known tools", () => { + expect(resolveAgentProgram("claude")?.command).toBe("claude"); + expect(resolveAgentProgram("codex")?.command).toBe("codex"); + expect(resolveAgentProgram("gemini")?.command).toBe("gemini"); + }); + + it("returns undefined for unknown tools", () => { + expect(resolveAgentProgram("nope")).toBeUndefined(); + expect(resolveAgentProgram("openclaw")).toBeUndefined(); + expect(resolveAgentProgram("hermes")).toBeUndefined(); + }); + + it("returns undefined for missing tool", () => { + expect(resolveAgentProgram(undefined)).toBeUndefined(); + }); +}); diff --git a/tests/unit/agent-client/stream-parsers.test.ts b/tests/unit/agent-client/stream-parsers.test.ts new file mode 100644 index 0000000..c871f75 --- /dev/null +++ b/tests/unit/agent-client/stream-parsers.test.ts @@ -0,0 +1,209 @@ +import { describe, it, expect } from "vitest"; +import { + parseClaudeStreamJson, + parseCodexJson, + parseGeminiPlain, +} from "../../../packages/core/src/agent-client/stream-parsers.js"; + +// TODO Phase 1 smoke: integration tests requiring real claude/codex/gemini in CI-skipped mode + +describe("parseClaudeStreamJson", () => { + it("returns null on empty line", () => { + expect(parseClaudeStreamJson("")).toBeNull(); + expect(parseClaudeStreamJson(" ")).toBeNull(); + }); + + it("emits text chunk from content_block_delta text_delta", () => { + const line = JSON.stringify({ + type: "content_block_delta", + delta: { type: "text_delta", text: "Hello" }, + }); + expect(parseClaudeStreamJson(line)).toEqual({ type: "text", content: "Hello" }); + }); + + it("unwraps event envelope and emits text chunk", () => { + const line = JSON.stringify({ + type: "assistant", + event: { + type: "content_block_delta", + delta: { type: "text_delta", text: " world" }, + }, + }); + expect(parseClaudeStreamJson(line)).toEqual({ type: "text", content: " world" }); + }); + + it("emits thinking chunk from thinking_delta", () => { + const line = JSON.stringify({ + type: "content_block_delta", + delta: { type: "thinking_delta", thinking: "pondering" }, + }); + expect(parseClaudeStreamJson(line)).toEqual({ type: "thinking", content: "pondering" }); + }); + + it("returns null for message_start / content_block_start / ping", () => { + expect(parseClaudeStreamJson(JSON.stringify({ type: "message_start" }))).toBeNull(); + expect(parseClaudeStreamJson(JSON.stringify({ type: "content_block_start" }))).toBeNull(); + expect(parseClaudeStreamJson(JSON.stringify({ type: "ping" }))).toBeNull(); + expect(parseClaudeStreamJson(JSON.stringify({ type: "content_block_stop" }))).toBeNull(); + }); + + it("emits tool_call from tool_use event", () => { + const line = JSON.stringify({ + type: "tool_use", + id: "tool_123", + name: "list_profiles", + input: { filter: "all" }, + }); + expect(parseClaudeStreamJson(line)).toEqual({ + type: "tool_call", + id: "tool_123", + tool: "list_profiles", + input: { filter: "all" }, + }); + }); + + it("emits tool_result from tool_result event with isError flag", () => { + const line = JSON.stringify({ + type: "tool_result", + tool_use_id: "tool_123", + content: "ok", + is_error: true, + }); + expect(parseClaudeStreamJson(line)).toEqual({ + type: "tool_result", + id: "tool_123", + result: "ok", + isError: true, + }); + }); + + it("emits done on message_stop", () => { + expect(parseClaudeStreamJson(JSON.stringify({ type: "message_stop" }))).toEqual({ + type: "done", + reason: "end_turn", + }); + }); + + it("emits done with max_turns from message_delta", () => { + const line = JSON.stringify({ + type: "message_delta", + delta: { stop_reason: "max_tokens" }, + }); + expect(parseClaudeStreamJson(line)).toEqual({ type: "done", reason: "max_turns" }); + }); + + it("emits error event as error chunk", () => { + const line = JSON.stringify({ type: "error", message: "rate limited" }); + expect(parseClaudeStreamJson(line)).toEqual({ type: "error", message: "rate limited" }); + }); + + it("returns null for unknown event types", () => { + expect(parseClaudeStreamJson(JSON.stringify({ type: "totally_new_event" }))).toBeNull(); + }); + + it("falls back to text chunk on non-JSON input", () => { + expect(parseClaudeStreamJson("preamble line")).toEqual({ type: "text", content: "preamble line" }); + }); +}); + +describe("parseCodexJson", () => { + it("emits text from delta events", () => { + expect(parseCodexJson(JSON.stringify({ kind: "delta", text: "hi" }))).toEqual({ + type: "text", + content: "hi", + }); + }); + + it("accepts 'type' discriminator as well as 'kind'", () => { + expect(parseCodexJson(JSON.stringify({ type: "delta", text: "hi" }))).toEqual({ + type: "text", + content: "hi", + }); + }); + + it("emits text from assistant message event", () => { + expect( + parseCodexJson(JSON.stringify({ kind: "message", role: "assistant", text: "answer" })), + ).toEqual({ type: "text", content: "answer" }); + }); + + it("ignores non-assistant messages", () => { + expect( + parseCodexJson(JSON.stringify({ kind: "message", role: "system", text: "sys" })), + ).toBeNull(); + }); + + it("emits tool_call from tool_call / function_call events", () => { + expect( + parseCodexJson( + JSON.stringify({ kind: "tool_call", id: "t1", name: "run", arguments: { a: 1 } }), + ), + ).toEqual({ type: "tool_call", id: "t1", tool: "run", input: { a: 1 } }); + + expect( + parseCodexJson( + JSON.stringify({ kind: "function_call", id: "t2", tool: "fn", input: { b: 2 } }), + ), + ).toEqual({ type: "tool_call", id: "t2", tool: "fn", input: { b: 2 } }); + }); + + it("emits tool_result with is_error → isError mapping", () => { + expect( + parseCodexJson( + JSON.stringify({ kind: "tool_result", id: "t1", output: "done", is_error: false }), + ), + ).toEqual({ type: "tool_result", id: "t1", result: "done", isError: false }); + }); + + it("emits thinking chunk", () => { + expect(parseCodexJson(JSON.stringify({ kind: "thinking", text: "reason" }))).toEqual({ + type: "thinking", + content: "reason", + }); + }); + + it("emits done on finished / done / complete", () => { + expect(parseCodexJson(JSON.stringify({ kind: "done" }))).toEqual({ + type: "done", + reason: "end_turn", + }); + expect(parseCodexJson(JSON.stringify({ kind: "finished", reason: "stop" }))).toEqual({ + type: "done", + reason: "stop", + }); + }); + + it("emits error chunk with message", () => { + expect(parseCodexJson(JSON.stringify({ kind: "error", message: "boom" }))).toEqual({ + type: "error", + message: "boom", + }); + }); + + it("returns null on empty line", () => { + expect(parseCodexJson("")).toBeNull(); + expect(parseCodexJson(" ")).toBeNull(); + }); + + it("falls back to text chunk on non-JSON input", () => { + expect(parseCodexJson("raw text")).toEqual({ type: "text", content: "raw text" }); + }); + + it("returns null for unknown kinds", () => { + expect(parseCodexJson(JSON.stringify({ kind: "some_future_event" }))).toBeNull(); + }); +}); + +describe("parseGeminiPlain", () => { + it("passes through non-empty lines as text", () => { + expect(parseGeminiPlain("hello")).toEqual({ type: "text", content: "hello" }); + }); + + it("preserves leading whitespace", () => { + expect(parseGeminiPlain(" indented")).toEqual({ type: "text", content: " indented" }); + }); + + it("returns null on empty line", () => { + expect(parseGeminiPlain("")).toBeNull(); + }); +}); diff --git a/user-docs/guide/profiles.md b/user-docs/guide/profiles.md index 2572e40..0ad26c2 100644 --- a/user-docs/guide/profiles.md +++ b/user-docs/guide/profiles.md @@ -75,6 +75,48 @@ When a profile with `inherits` is launched, ARC runs `resolveProfile()` to merge 1. Base profile settings (tool, auth, env, launch args) 2. Child profile overrides (anything explicitly set) +## Launch Modes + +Each profile can run its underlying tool in one of two modes, selectable per-profile or per-launch. + +### Native (default) + +Full TTY handoff. ARC spawns the tool with inherited stdio and exits, letting the tool paint its own TUI — Claude's `statusLine`, Gemini's ANSI chrome, Codex's REPL, etc. This is the right mode for daily interactive work. + +```bash +arc launch work # uses profile's launchMode (default: native) +arc launch work --native # force native mode for this launch +``` + +In native mode ARC does no process supervision, no stdout capture, and no stream parsing. You get the tool's native experience; ARC just handled env isolation, credentials, and hook pre-launch checks. + +### Worker + +Run the tool under ARC supervision via the adapter's managed lifecycle. Stdout is captured and parsed so ARC can feed orchestrators (roundtable, PLAN/EXEC/VERIFY pipelines, dashboard chat). This **suppresses the tool's native TUI chrome** — it's meant for programmatic use, not direct interactive sessions. + +```bash +arc launch work --worker # one-off worker mode +``` + +To make worker mode the default for a profile, either set `launchMode: "worker"` in `~/.arc/config.json` or press `m` on the profile in the TUI Profiles view to toggle. + +### When to use each + +| Scenario | Mode | +|---|---| +| Daily coding with Claude / Gemini / Codex | **native** | +| Running a roundtable or multi-agent pipeline | **worker** (orchestrator forces this) | +| Dashboard AI chat / programmatic prompts | **worker** | +| CI / headless automation | **worker** | +| Debugging the tool's own TUI (statusLine, theming) | **native** | + +### Resolution order + +1. `opts.launchMode` passed by an orchestrator (always wins) +2. `--native` / `--worker` CLI flag +3. `profile.launchMode` in `~/.arc/config.json` +4. Default: `native` + ## Workspace Selection ARC supports per-repository profile auto-selection via `arc.json` in the project root: From f84b459d7f2367d4ada40dbe5d29e69cba94797a Mon Sep 17 00:00:00 2001 From: Bailey Dixon Date: Sat, 18 Apr 2026 08:38:25 -0400 Subject: [PATCH 06/17] docs: Mark Phase 0, 0.5, 1 complete in plan Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/plans/ai-and-roundtable.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/plans/ai-and-roundtable.md b/docs/plans/ai-and-roundtable.md index 86a9d82..96d0363 100644 --- a/docs/plans/ai-and-roundtable.md +++ b/docs/plans/ai-and-roundtable.md @@ -391,13 +391,13 @@ See **Decisions** section at the top of this doc. Update checkboxes in-place as phases complete. Add a `Completed YYYY-MM-DD` marker at the bottom of each phase. ### Phase 0 — Scaffolding -- [ ] Not started +- [x] **Completed 2026-04-18** — folded into Phase 1 commit `6ff876b` ### Phase 0.5 — Launch hygiene (native vs orchestrated) -- [ ] Not started +- [x] **Completed 2026-04-18** — commit `6ff876b`. `launchMode` field, `--native`/`--worker` flags, doctor check, `m` toggle in ProfilesView, docs section in user-docs/guide/profiles.md ### Phase 1 — Agent client (CLI-spawn) foundation -- [ ] Not started +- [x] **Completed 2026-04-18** — commit `6ff876b`. `packages/core/src/agent-client/` with Claude/Codex/Gemini clients, MCP injection per mcpMode variant, stream parsers, 48 unit tests. Unverified CLI flags flagged for Phase 4 smoke test. ### Phase 2 — Tool registry + agent loop - [ ] Not started From 443a78caec2e61ef4309063909debdb1267290c4 Mon Sep 17 00:00:00 2001 From: Bailey Dixon Date: Sat, 18 Apr 2026 09:26:06 -0400 Subject: [PATCH 07/17] =?UTF-8?q?feat:=20bare=20launch,=20tool=20registry,?= =?UTF-8?q?=20knowledge=20layer,=20docs=20(streams=20=CE=B1/=CE=B2/=CE=B3/?= =?UTF-8?q?=CE=B4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four streams merged via parallel agent worktrees. Stream α — Phase 0.7: Bare launch + clearable active profile - ArcConfig.activeProfile: string | null (new installs start null) - arc run and arc launch --bare skip ARC overlay entirely - Tool-name inference: arc launch claude with no matching profile infers bare - arc profile switch none / arc profile clear-active clear the active pointer - TUI: x key clears active in ProfilesView; Dash/Session empty-state copy - Doctor handles null activeProfile gracefully - New unit tests: null-active-profile.test.ts Stream β — Phase 2: Tool registry + agent loop - packages/core/src/agent/: Tool/ToolRegistry/PermissionMode/runAgent - 16 ARC tools wired to core fns: 11 read, 4 write, 1 dangerous (list_profiles, clone_profile, switch_active_profile, delete_profile, ...) - Three permission modes (read-only / supervised / autonomous) with confirm callback for writes in supervised mode - zod added as first runtime dep of core for tool schema validation - 43 new unit tests Stream γ — Phase 3: Knowledge endowment - packages/core/src/knowledge/: static ARC catalog (architecture, 52 command entries across 6 categories, 16-term glossary) - FEATURES_INDEX: 33 curated entries with status/summary/links - buildSystemPrompt() composes 6-section prompt under 4K tokens (identity / capabilities / architecture / glossary / live state / behavior rules per permission mode); 5135 chars ~1284 tokens typical - 27 new unit tests Stream δ — Docs + user-site - user-docs: launch modes section, launch-without-profile section, getting-started fast vs full path, architecture overview of agent-client/agent/knowledge modules, configuration reference - FEATURES.md: shipped items checked, roadmap entries added for Phases 2-8 with plan doc references - CLAUDE.md: architecture bullets for all three new core modules, launch modes, bare launch - README: quickstart split into fast path (arc run) and full path - site/: Features.tsx copy updated for native vs worker + bare Merge conflict resolutions: cli.ts (combined --native/--worker/--bare flags and run command), launch.ts (both launchMode and bare options in LaunchOptions), ProfilesView.tsx (both m-toggle and x-clear keybinds), CLAUDE.md (preserved existing bullets and added new ones). Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 5 + FEATURES.md | 10 + README.md | 21 +- package.json | 3 +- packages/cli/src/cli.ts | 68 ++- packages/cli/src/commands/auth.ts | 5 + packages/cli/src/commands/doctor.ts | 15 + packages/cli/src/commands/exec.ts | 7 +- packages/cli/src/commands/instructions.ts | 4 + packages/cli/src/commands/launch.ts | 127 ++++- packages/cli/src/commands/profile.ts | 36 +- packages/cli/src/commands/provider.ts | 4 + packages/cli/src/commands/resolve.ts | 3 + packages/cli/src/commands/run.ts | 22 + packages/cli/src/display.ts | 14 + packages/cli/src/services/health.ts | 9 +- packages/cli/src/tui/views/DashView.tsx | 8 +- packages/cli/src/tui/views/ProfilesView.tsx | 25 +- packages/cli/src/tui/views/SessionView.tsx | 15 +- packages/core/package.json | 3 + packages/core/src/agent/arc-tools.ts | 411 ++++++++++++++++ packages/core/src/agent/index.ts | 25 + packages/core/src/agent/loop.ts | 117 +++++ packages/core/src/agent/permissions.ts | 39 ++ packages/core/src/agent/registry.ts | 226 +++++++++ packages/core/src/agent/types.ts | 139 ++++++ packages/core/src/config.ts | 23 +- packages/core/src/index.ts | 2 + packages/core/src/knowledge/feature-index.ts | 316 ++++++++++++ packages/core/src/knowledge/index.ts | 31 ++ packages/core/src/knowledge/runtime.ts | 195 ++++++++ packages/core/src/knowledge/static.ts | 485 +++++++++++++++++++ packages/core/src/types.ts | 9 +- pnpm-lock.yaml | 9 +- site/src/components/Features.tsx | 2 +- tests/integration/profile.test.ts | 3 +- tests/unit/agent/arc-tools.test.ts | 298 ++++++++++++ tests/unit/agent/loop.test.ts | 156 ++++++ tests/unit/agent/permissions.test.ts | 73 +++ tests/unit/agent/registry.test.ts | 208 ++++++++ tests/unit/cli-auth.test.ts | 16 +- tests/unit/knowledge/feature-index.test.ts | 76 +++ tests/unit/knowledge/runtime.test.ts | 190 ++++++++ tests/unit/knowledge/static.test.ts | 92 ++++ tests/unit/null-active-profile.test.ts | 185 +++++++ user-docs/architecture/index.md | 23 + user-docs/guide/getting-started.md | 18 + user-docs/guide/profiles.md | 53 ++ user-docs/reference/configuration.md | 3 + 49 files changed, 3785 insertions(+), 42 deletions(-) create mode 100644 packages/cli/src/commands/run.ts create mode 100644 packages/core/src/agent/arc-tools.ts create mode 100644 packages/core/src/agent/index.ts create mode 100644 packages/core/src/agent/loop.ts create mode 100644 packages/core/src/agent/permissions.ts create mode 100644 packages/core/src/agent/registry.ts create mode 100644 packages/core/src/agent/types.ts create mode 100644 packages/core/src/knowledge/feature-index.ts create mode 100644 packages/core/src/knowledge/index.ts create mode 100644 packages/core/src/knowledge/runtime.ts create mode 100644 packages/core/src/knowledge/static.ts create mode 100644 tests/unit/agent/arc-tools.test.ts create mode 100644 tests/unit/agent/loop.test.ts create mode 100644 tests/unit/agent/permissions.test.ts create mode 100644 tests/unit/agent/registry.test.ts create mode 100644 tests/unit/knowledge/feature-index.test.ts create mode 100644 tests/unit/knowledge/runtime.test.ts create mode 100644 tests/unit/knowledge/static.test.ts create mode 100644 tests/unit/null-active-profile.test.ts diff --git a/CLAUDE.md b/CLAUDE.md index 3960a26..cfa120e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -33,6 +33,11 @@ ARC (Agent Runtime Control) is a CLI + TUI for managing multiple agent profiles - **Adapters:** Claude Code (SDK + plugin + hooks), Codex CLI, Gemini CLI, OpenClaw (native plugin), Hermes Agent (MCP bridge), OpenAI Compatible (custom providers), Generic (fallback for any tool) - **Agent instructions:** `instructions` / `instructionsFile` fields on Profile; resolved at launch, injected as `ARC_AGENT_INSTRUCTIONS` env var; `arc instructions` CLI for show/set/edit/clear - **Custom providers:** `openai-compat` auth type + `ProviderConfig` (baseUrl, model, apiKeyEnvVar) on Profile; 7 presets (OpenRouter, Ollama, LM Studio, Together, Groq, MiniMax, DeepSeek); `arc provider` CLI for set/show/clear/presets +- **Launch modes:** `launchMode: "native" | "worker"` on Profile (default `native`). Native uses full TTY handoff so the tool paints its own TUI; worker uses `spawnManagedProcess` for ARC-supervised orchestration. CLI flags `--native` / `--worker` override. TUI: `m` in ProfilesView toggles. Roundtable forces worker regardless. +- **Bare launch:** `arc run ` and `arc launch --bare ` skip ARC overlay entirely (no env injection, no hook pipeline). Tool-name inference falls through to bare when no matching profile exists. `activeProfile` may be `null` — cleared via `arc profile switch none` or `arc profile clear-active`, rendered as `(none)`. +- **Agent client (internal):** `packages/core/src/agent-client/` — CLI-spawn clients for Claude/Codex/Gemini with MCP config injection per `mcpMode` variant and per-tool stream parsers. Foundation for upcoming `arc chat` + roundtable orchestrator. See `docs/plans/ai-and-roundtable.md`. +- **Agent loop + tool registry (internal):** `packages/core/src/agent/` — tool registry with read-only/supervised/autonomous permission modes, agent loop for tool-use dispatch. +- **Knowledge (internal):** `packages/core/src/knowledge/` — static + runtime system prompt composition (ARC architecture, command reference, live state). ## Key Conventions diff --git a/FEATURES.md b/FEATURES.md index ed51e4d..6d8e85c 100644 --- a/FEATURES.md +++ b/FEATURES.md @@ -15,6 +15,8 @@ Tracking file for planned features, enhancements, and ideas. Checked items are s - [x] **Workspace-aware profile auto-selection** — `arc.json` in repo root specifies preferred profile/tool; workspace overrides applied on launch (Phase 9) - [x] **Workspace shell syntax highlighting** — tokenized input with color-coded `/commands` (green), `@profiles` (blue), `#tags` (dimmed); invalid tokens show in red - [x] **Workspace shell auto-complete** — suggestion overlay for `/` commands and `@profile` mentions; Tab/Enter accepts, arrows navigate, Escape dismisses +- [x] **Launch modes (native / worker)** — `launchMode` field on Profile, `arc launch --native` / `--worker` CLI flags, `m` key toggle in ProfilesView, doctor check for deprecated `CLAUDE_CODE_NO_FLICKER` +- [x] **Bare launch / clearable active profile** — `arc run `, `arc launch --bare `, tool-name inference when no matching profile exists, `arc profile switch none` / `arc profile clear-active`, `activeProfile: null` renders as `(none)` - [ ] **Quick profile switch overlay** — global `Ctrl+S` or palette action that shows a focused profile picker from any view - [x] **Doctor repair actions** — inline install hints, re-auth instructions, and PATH/shell fix hints on actionable diagnostics - [ ] **Profile search/filter** — `/` search in Profiles view and queue for scaling to 10+ profiles @@ -32,6 +34,14 @@ Tracking file for planned features, enhancements, and ideas. Checked items are s - [ ] **Team/shared config** — repo-checked config with local secret overlays - [x] **Backup/export/import** — `arc backup create/restore/list` (gzipped archive of `~/.arc/`, credentials excluded by default) + `arc profile export` / `arc profile import-file` (single-profile JSON transport with inlined instructions) - [x] **Managed updates** — self-update system with npm registry check and TUI update banner +- [x] **Agent client foundation** — internal CLI-spawn agent client at `packages/core/src/agent-client/` (Claude/Codex/Gemini), MCP config injection per `mcpMode`, stream parsers. Building block for upcoming chat + roundtable orchestrator (see `docs/plans/ai-and-roundtable.md` Phase 1) +- [ ] **Tool registry + agent loop** — `packages/core/src/agent/` tool registry with read-only/supervised/autonomous permission modes; ARC tool set wired to existing handlers (list_profiles, clone_profile, configure_provider, etc.). Plan Phase 2 +- [ ] **Knowledge endowment** — `packages/core/src/knowledge/` system prompt composition (static architecture + command reference + runtime state). Plan Phase 3 +- [ ] **`arc chat` CLI** — terminal REPL using active profile's agent client, streaming output, permission-gated tool calls. Plan Phase 4 +- [ ] **Roundtable orchestrator** — driver loop over the existing roundtable hook; adaptive pacing ported from Agent-Forge. Plan Phase 5 +- [ ] **`arc roundtable` CLI + team MCP tools** — `arc roundtable --agents a,b,c` with streaming transcript; `arc_chat` / `arc_roundtable` / `team_*` MCP tools. Plan Phase 6 +- [ ] **Dashboard chat view** — per-session WS streaming, tool-call visualization, permission-mode toggle, confirmation modal. Plan Phase 7 +- [ ] **Dashboard roundtable + pipelines view** — configure + run multi-agent flows from the browser with live transcript. Plan Phase 8 ## Priority 4 — Observability & Polish diff --git a/README.md b/README.md index 7a5fe47..40c944e 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,16 @@ See [Getting Started](https://arc-cli.dev/docs/guide/getting-started) for requir ## Quick Start +**Fastest path — no profile:** + +```bash +arc run claude # Native passthrough — no env injection, no overlay +arc run gemini +arc run codex +``` + +**Full path — with a profile:** + ```bash arc # Open TUI — onboarding wizard on first run ``` @@ -145,7 +155,8 @@ The onboarding wizard auto-detects installed tools (Claude, Gemini, Codex) and o ```bash arc create work --tool claude --auth-type oauth -arc launch work +arc launch work # native by default (full TTY handoff) +arc launch work --worker # under ARC supervision for hooks/orchestration arc use personal arc status ``` @@ -172,13 +183,19 @@ arc use # Switch active profile arc profile show [name] # Show profile details arc profile delete # Delete a profile arc profile import # Import existing tool config +arc profile switch none # Clear the active profile +arc profile clear-active # Same — activeProfile becomes null arc which # Show resolved profile source ``` ### Launch ```bash -arc launch [name] # Launch agent tool with profile +arc run # Native passthrough (no profile, no overlay) +arc launch [name] # Launch agent tool with profile (native by default) +arc launch [name] --native # Force full TTY handoff +arc launch [name] --worker # Force ARC-supervised mode (for orchestration) +arc launch --bare # Same as `arc run` arc launch [name] -- --model opus # Pass flags through to the tool ``` diff --git a/package.json b/package.json index 4e3a533..cc0b85d 100644 --- a/package.json +++ b/package.json @@ -109,7 +109,8 @@ "tsx": "^4", "typescript": "^6.0", "vitepress": "^1.6.4", - "vitest": "^4.1.2" + "vitest": "^4.1.2", + "zod": "^3.25.0" }, "optionalDependencies": { "@inquirer/prompts": "^8", diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 0f14cfc..8ec6bf3 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -210,6 +210,7 @@ export function createProgram(): Command { .option("-d, --dashboard", "Start web dashboard alongside agent") .option("--native", "Run the tool with full TTY handoff (ARC exits, tool paints its own TUI)") .option("--worker", "Run the tool under ARC supervision (stdout captured for orchestration)") + .option("--bare", "Skip profile resolution and env injection — spawn the named tool natively") .passThroughOptions() .allowUnknownOption() .allowExcessArguments() @@ -236,12 +237,13 @@ Examples: $ arc launch work --dangerously-skip-permissions $ arc launch work -p "explain this code" $ arc launch -- --model sonnet (use -- when omitting profile name) + $ arc launch --bare claude (native launch, no profile env) ` ) .action( async ( name: string | undefined, - opts: { dashboard?: boolean; native?: boolean; worker?: boolean }, + opts: { dashboard?: boolean; native?: boolean; worker?: boolean; bare?: boolean }, cmd: Command ) => { // Re-inject parsed launch-mode flags into the args array so handleLaunch can see them. @@ -252,7 +254,42 @@ Examples: if (opts.worker) extraArgs.push("--worker"); const mergedArgs = extraArgs.length > 0 ? [...cmd.args, ...extraArgs] : cmd.args; const mod = await import("./commands/launch.js"); - await mod.handleLaunch(name, mergedArgs, { dashboard: opts.dashboard }); + await mod.handleLaunch(name, mergedArgs, { + dashboard: opts.dashboard, + bare: opts.bare, + }); + } + ); + + program + .command("run [args...]") + .description("Run a native agent tool (claude/codex/gemini/…) with no profile env injection") + .passThroughOptions() + .allowUnknownOption() + .allowExcessArguments() + .addHelpText( + "after", + ` +Thin alias for 'arc launch --bare '. Ambient environment only — +no CLAUDE_CONFIG_DIR, GEMINI_CLI_HOME, CODEX_HOME, or ARC env vars set. + +Examples: + $ arc run claude --version + $ arc run gemini --help + $ arc run codex -- some-flag +` + ) + .action( + async ( + tool: string, + _args: string[], + _opts: Record, + cmd: Command + ) => { + // `cmd.args` = [tool, ...rest]; pass the rest through to the binary. + const rest = cmd.args.slice(1); + const mod = await import("./commands/run.js"); + await mod.handleRun(tool, rest); } ); @@ -412,12 +449,27 @@ Examples: profile .command("switch ") .alias("use") - .description("Switch active profile") + .description("Switch active profile (use 'none' or 'off' to clear)") + .addHelpText("after", ` +Examples: + $ arc profile switch work + $ arc profile switch none (clear active profile) + $ arc profile switch off (same — native launches via 'arc run ') +`) .action(async (name: string) => { const mod = await import("./commands/profile.js"); await mod.handleSwitch(name); }); + profile + .command("clear-active") + .alias("clear") + .description("Clear the active profile — tools launch natively via 'arc run'") + .action(async () => { + const mod = await import("./commands/profile.js"); + await mod.handleClearActive(); + }); + profile .command("delete ") .alias("rm") @@ -662,8 +714,8 @@ Examples: } const profileName = name ?? config.activeProfile; - if (!config.profiles[profileName]) { - showError(`Profile "${profileName}" not found.`); + if (!profileName || !config.profiles[profileName]) { + showError(profileName ? `Profile "${profileName}" not found.` : "No active profile — pass a name."); process.exit(1); } @@ -683,10 +735,10 @@ Examples: const config = loadConfig(); const profileName = name ?? config.activeProfile; - const profile = config.profiles[profileName]; + const profile = profileName ? config.profiles[profileName] : undefined; - if (!profile) { - showError(`Profile "${profileName}" not found.`); + if (!profile || !profileName) { + showError(profileName ? `Profile "${profileName}" not found.` : "No active profile — pass a name."); process.exit(1); } diff --git a/packages/cli/src/commands/auth.ts b/packages/cli/src/commands/auth.ts index bb0bf53..e8d6299 100644 --- a/packages/cli/src/commands/auth.ts +++ b/packages/cli/src/commands/auth.ts @@ -572,6 +572,11 @@ export async function handleAuthWhoami( const config = loadConfig(); const resolvedName = profileName ?? config.activeProfile; + if (!resolvedName) { + error("No active profile — pass a profile name."); + process.exit(1); + } + const raw = config.profiles[resolvedName]; if (!raw) { error(`Profile "${resolvedName}" not found.`); diff --git a/packages/cli/src/commands/doctor.ts b/packages/cli/src/commands/doctor.ts index 7ea9ada..1bc2247 100644 --- a/packages/cli/src/commands/doctor.ts +++ b/packages/cli/src/commands/doctor.ts @@ -158,6 +158,21 @@ async function checkProfiles(): Promise { return; } + // Surface active-profile state first (info row, not a failure). + if (config.activeProfile === null) { + console.log( + ` ${pc.blue("\u2139")} active profile: ${pc.dim("(none)")} \u2014 that's fine; use ${pc.bold(pc.cyan("arc run "))} to launch tools natively` + ); + } else if (!config.profiles[config.activeProfile]) { + console.log( + ` ${pc.yellow("\u26A0")} active profile ${JSON.stringify(config.activeProfile)} references a missing profile` + ); + } else { + console.log( + ` ${pc.green("\u2714")} active profile: ${config.activeProfile}` + ); + } + const names = Object.keys(config.profiles); if (names.length === 0) { console.log(` ${pc.dim("No profiles configured.")}`); diff --git a/packages/cli/src/commands/exec.ts b/packages/cli/src/commands/exec.ts index 8a66cf8..252f9a0 100644 --- a/packages/cli/src/commands/exec.ts +++ b/packages/cli/src/commands/exec.ts @@ -17,10 +17,11 @@ export async function handleExec( if (name && config.profiles[name]) { profileName = name; passthrough = rawArgs.slice(1); - } else if (name) { - profileName = config.activeProfile; - passthrough = rawArgs; } else { + if (config.activeProfile === null) { + error("No active profile. Use 'arc profile switch ' or pass a profile argument."); + process.exit(1); + } profileName = config.activeProfile; passthrough = rawArgs; } diff --git a/packages/cli/src/commands/instructions.ts b/packages/cli/src/commands/instructions.ts index ee1a567..3855c00 100644 --- a/packages/cli/src/commands/instructions.ts +++ b/packages/cli/src/commands/instructions.ts @@ -8,6 +8,10 @@ import { success, error, info, warn } from "../display.js"; function getProfile(name?: string) { const config = loadConfig(); const profileName = name ?? config.activeProfile; + if (!profileName) { + error("No active profile. Use 'arc profile switch ' or pass a profile name."); + process.exit(1); + } const profile = config.profiles[profileName]; if (!profile) { error(`Profile "${profileName}" not found.`); diff --git a/packages/cli/src/commands/launch.ts b/packages/cli/src/commands/launch.ts index f2bec0d..c19a82f 100644 --- a/packages/cli/src/commands/launch.ts +++ b/packages/cli/src/commands/launch.ts @@ -25,6 +25,35 @@ import type { AgentProcess } from "@axiom-labs/arc-core"; const isWindows = process.platform === "win32"; +/** + * Known native agent tool binaries used for bare-mode inference. + * When the first positional arg to `arc launch` matches one of these AND no + * profile exists by that name, we auto-infer `--bare` and launch the tool + * natively (no profile env injection). + */ +export const KNOWN_AGENT_TOOLS = new Set([ + "claude", + "codex", + "gemini", + "hermes", + "openclaw", +]); + +/** + * Pure helper — decide whether the caller's first positional arg should + * trigger bare-mode inference. Exposed for tests; kept free of I/O. + */ +export function shouldInferBare( + name: string | undefined, + profileNames: readonly string[], + explicitBare: boolean +): boolean { + if (explicitBare) return true; + if (typeof name !== "string") return false; + if (!KNOWN_AGENT_TOOLS.has(name)) return false; + return !profileNames.includes(name); +} + /** Check whether a command binary is available on PATH. */ export function findBinary(name: string): boolean { const result = isWindows @@ -33,6 +62,55 @@ export function findBinary(name: string): boolean { return result.status === 0; } +/** + * Bare launch: spawn a native tool with the ambient environment only. + * No profile resolution, no env injection (CLAUDE_CONFIG_DIR etc.), no + * hook pipeline, no session/telemetry tracking. This is the "arc is + * optional orchestration" path — the user just wants `claude` to run. + */ +export async function handleBareLaunch( + tool: string, + args: string[], + opts?: { beforeSpawn?: () => void | Promise } +): Promise { + if (!findBinary(tool)) { + error(`Binary "${tool}" not found on PATH.`); + warn(getInstallHint(tool)); + process.exit(1); + } + + logAction("launch", `(bare) ${tool}`); + try { + writeLogEvent({ + level: "info", + component: "launch", + action: "bare:launch", + message: `Bare launch of ${tool}`, + data: { profile: null, tool, args }, + }); + } catch { + // Non-fatal + } + + const flagStr = args.length > 0 ? ` [${args.join(" ")}]` : ""; + info(`Launching ${tool} (bare mode)${flagStr}`); + + if (opts?.beforeSpawn) { + await opts.beforeSpawn(); + } + + const result = isWindows + ? spawnSync("cmd", ["/c", tool, ...args], { stdio: "inherit" }) + : spawnSync(tool, args, { stdio: "inherit" }); + + if (result.error) { + error(`Failed to launch ${tool}: ${result.error.message}`); + process.exit(1); + } + + process.exit(result.status ?? 0); +} + /** Suggest an install command for known agent tool binaries. */ function getInstallHint(tool: string): string { switch (tool) { @@ -59,9 +137,45 @@ export async function handleLaunch( * `worker` mode so stdout can be captured. */ launchMode?: "native" | "worker"; + /** + * Bare mode: skip profile resolution and env injection entirely. Just + * spawn the named tool with ambient env. `name` is then treated as the + * tool binary (claude / codex / gemini / …). + */ + bare?: boolean; + /** Override the tool name in bare mode when it differs from `name`. */ + tool?: string; } ): Promise { const config = loadConfig(); + + // ─── Bare-mode / tool-name inference ──────────────────────────────── + // 1. Explicit opts.bare=true always wins. + // 2. Otherwise, if the first positional arg matches a known native tool + // AND no profile exists by that name, infer bare mode. + const profileNames = Object.keys(config.profiles); + const explicitBare = opts?.bare === true; + const bare = shouldInferBare(name, profileNames, explicitBare); + if (bare && !explicitBare) { + // Emit informational notice (stderr so stdout stays clean for piping). + warn(`no profile named "${name}" \u2014 launching native tool.`); + } + + if (bare) { + const toolName = opts?.tool ?? name; + if (!toolName) { + error("Bare launch requires a tool name (e.g. 'arc run claude')."); + process.exit(1); + } + // In bare mode, if `name` was consumed as the tool, the rest is passthrough. + let barePassthrough = name === toolName ? rawArgs.slice(1) : rawArgs; + if (barePassthrough.length > 0 && barePassthrough[0] === "--") { + barePassthrough = barePassthrough.slice(1); + } + await handleBareLaunch(toolName, barePassthrough, { beforeSpawn: opts?.beforeSpawn }); + return; + } + let profileName: string; let passthrough: string[]; @@ -70,12 +184,21 @@ export async function handleLaunch( profileName = name; passthrough = rawArgs.slice(1); } else if (name) { - // Commander consumed something as name but it's not a valid profile. - // Treat everything (including the consumed "name") as passthrough. + // Commander consumed something as name but it's not a valid profile, + // and it's not a known tool either. Fall back to active profile. + if (config.activeProfile === null) { + error(`No profile named "${name}" and no active profile set.`); + warn("Use 'arc run ' for native launch, or switch to a profile with 'arc profile switch '."); + process.exit(1); + } profileName = config.activeProfile; passthrough = rawArgs; } else { // No name provided — active profile, everything is passthrough + if (config.activeProfile === null) { + error("No active profile. Use 'arc run ' for native launch, or 'arc profile switch '."); + process.exit(1); + } profileName = config.activeProfile; passthrough = rawArgs; } diff --git a/packages/cli/src/commands/profile.ts b/packages/cli/src/commands/profile.ts index 9b4d547..5891ef9 100644 --- a/packages/cli/src/commands/profile.ts +++ b/packages/cli/src/commands/profile.ts @@ -101,7 +101,13 @@ export async function handleList(): Promise { export async function handleShow(name?: string): Promise { const config = loadConfig(); - const resolved = resolveProfileName(config, name); + let resolved: string; + try { + resolved = resolveProfileName(config, name); + } catch (err: unknown) { + error(err instanceof Error ? err.message : String(err)); + process.exit(1); + } const rawProfile = config.profiles[resolved]; if (!rawProfile) { error(`Profile "${resolved}" not found.`); @@ -132,6 +138,12 @@ export async function handleShow(name?: string): Promise { } export async function handleSwitch(name: string): Promise { + // Treat "none" / "off" / "" as a request to clear the active profile. + const lowered = (name ?? "").toLowerCase(); + if (!name || lowered === "none" || lowered === "off") { + await handleClearActive(); + return; + } const config = loadConfig(); if (!config.profiles[name]) { error(`Profile "${name}" not found.`); @@ -142,6 +154,19 @@ export async function handleSwitch(name: string): Promise { success(`Switched to profile "${name}".`); } +export async function handleClearActive(): Promise { + const config = loadConfig(); + if (config.activeProfile === null) { + info("No active profile — nothing to clear."); + return; + } + const previous = config.activeProfile; + config.activeProfile = null; + saveConfig(config); + success(`Cleared active profile (was "${previous}").`); + info("Launch tools natively with 'arc run ' or pass --profile to commands."); +} + export async function handleDelete(name: string, opts?: { force?: boolean }): Promise { const config = loadConfig(); if (!config.profiles[name]) { @@ -174,8 +199,13 @@ export async function handleDelete(name: string, opts?: { force?: boolean }): Pr delete config.profiles[name]; if (config.activeProfile === name) { const remaining = Object.keys(config.profiles); - config.activeProfile = remaining[0]!; - info(`Active profile switched to "${remaining[0]}".`); + if (remaining.length > 0) { + config.activeProfile = remaining[0]!; + info(`Active profile switched to "${remaining[0]}".`); + } else { + config.activeProfile = null; + info("No profiles remain — active profile cleared."); + } } saveConfig(config); success(`Profile "${name}" deleted.`); diff --git a/packages/cli/src/commands/provider.ts b/packages/cli/src/commands/provider.ts index b128dc6..b50f12f 100644 --- a/packages/cli/src/commands/provider.ts +++ b/packages/cli/src/commands/provider.ts @@ -77,6 +77,10 @@ const PRESETS: ProviderPreset[] = [ function getProfile(name?: string) { const config = loadConfig(); const profileName = name ?? config.activeProfile; + if (!profileName) { + error("No active profile. Use 'arc profile switch ' or pass a profile name."); + process.exit(1); + } const profile = config.profiles[profileName]; if (!profile) { error(`Profile "${profileName}" not found.`); diff --git a/packages/cli/src/commands/resolve.ts b/packages/cli/src/commands/resolve.ts index 4c91c0c..bbd47f8 100644 --- a/packages/cli/src/commands/resolve.ts +++ b/packages/cli/src/commands/resolve.ts @@ -11,6 +11,9 @@ export async function handleResolveConfigDir(): Promise { } const resolved = profileName ?? config.activeProfile; + if (!resolved) { + process.exit(1); + } const profile = config.profiles[resolved]; if (!profile) { diff --git a/packages/cli/src/commands/run.ts b/packages/cli/src/commands/run.ts new file mode 100644 index 0000000..16fc4a7 --- /dev/null +++ b/packages/cli/src/commands/run.ts @@ -0,0 +1,22 @@ +import { handleLaunch } from "./launch.js"; + +/** + * `arc run [args...]` — thin alias for `arc launch --bare `. + * + * Skips all profile resolution and env injection. The named binary is + * launched with the ambient environment so ARC behaves as optional + * orchestration rather than a required wrapper. + */ +export async function handleRun( + tool: string, + args: string[], + opts?: { beforeSpawn?: () => void | Promise } +): Promise { + // Delegate to handleLaunch in bare mode. The first positional to + // handleLaunch is the "name" slot; bare mode consumes it as the tool. + await handleLaunch(tool, [tool, ...args], { + bare: true, + tool, + beforeSpawn: opts?.beforeSpawn, + }); +} diff --git a/packages/cli/src/display.ts b/packages/cli/src/display.ts index 0605161..51181c6 100644 --- a/packages/cli/src/display.ts +++ b/packages/cli/src/display.ts @@ -131,6 +131,20 @@ export function profileTable( ].join("\n"); } +// ── Active Profile Rendering ─────────────────────── + +/** + * Format the active profile name for display. + * Returns `(none)` (dimmed) when no profile is active — this is the + * canonical "bare-mode" marker used across CLI output and TUI. + */ +export function formatActiveProfile(name: string | null | undefined): string { + if (!name) { + return pc.dim("(none)"); + } + return name; +} + // ── Redaction ────────────────────────────────────── export function redact(value: string): string { diff --git a/packages/cli/src/services/health.ts b/packages/cli/src/services/health.ts index 1fdb16d..21cf2c6 100644 --- a/packages/cli/src/services/health.ts +++ b/packages/cli/src/services/health.ts @@ -120,11 +120,16 @@ export async function getRuntimeHealthReport(): Promise { } checks.push({ id: "profiles-present", label: "Profiles configured", status: Object.keys(config.profiles).length > 0 ? "pass" : "fail", summary: Object.keys(config.profiles).length > 0 ? `${Object.keys(config.profiles).length} profiles configured` : "No profiles configured" }); - checks.push({ id: "active-profile", label: "Active profile", status: config.profiles[config.activeProfile] ? "pass" : "fail", summary: config.profiles[config.activeProfile] ? `Active profile is ${config.activeProfile}` : `Active profile ${config.activeProfile} is missing`, profile: config.activeProfile }); + const activeKey = config.activeProfile; + if (activeKey === null) { + checks.push({ id: "active-profile", label: "Active profile", status: "pass", summary: "No active profile (bare mode) — tools launch natively via 'arc run'" }); + } else { + checks.push({ id: "active-profile", label: "Active profile", status: config.profiles[activeKey] ? "pass" : "fail", summary: config.profiles[activeKey] ? `Active profile is ${activeKey}` : `Active profile ${activeKey} is missing`, profile: activeKey }); + } const logDirWritable = canWriteLogDir(); checks.push({ id: "log-dir", label: "Log directory", status: logDirWritable ? "pass" : "fail", summary: logDirWritable ? `Log directory writable at ${getLogsDir()}` : `Log directory not writable at ${getLogsDir()}` }); for (const [name, profile] of Object.entries(config.profiles)) { checks.push(...(await buildProfileChecks(name, profile))); } - return buildHealthReport(checks, config.activeProfile); + return buildHealthReport(checks, activeKey ?? undefined); } diff --git a/packages/cli/src/tui/views/DashView.tsx b/packages/cli/src/tui/views/DashView.tsx index 387f8f0..ebf71aa 100644 --- a/packages/cli/src/tui/views/DashView.tsx +++ b/packages/cli/src/tui/views/DashView.tsx @@ -138,7 +138,7 @@ function LeftColumn({ profiles, colors, isDark }: { )} {activeToolLabel && ( @@ -147,6 +147,12 @@ function LeftColumn({ profiles, colors, isDark }: { {activeProfile?.credential?.accountTier && ( )} + {!activeProfile && profiles.length > 0 && ( + + Press c to create a profile, + or run arc run <tool> for native mode. + + )} ); diff --git a/packages/cli/src/tui/views/ProfilesView.tsx b/packages/cli/src/tui/views/ProfilesView.tsx index ada92a0..a4539a8 100644 --- a/packages/cli/src/tui/views/ProfilesView.tsx +++ b/packages/cli/src/tui/views/ProfilesView.tsx @@ -66,7 +66,8 @@ export function ProfilesView({ if (config.activeProfile === deleteTarget) { const remaining = Object.keys(config.profiles); - config.activeProfile = remaining[0] ?? "default"; + // When no profiles remain, clear active entirely. + config.activeProfile = remaining[0] ?? null; } saveConfig(config); @@ -377,6 +378,26 @@ export function ProfilesView({ } return; } + + // [x] clear active profile (tools launch natively via `arc run`) + if (input === "x") { + try { + const config = loadConfig(); + if (config.activeProfile === null) { + showMessage("No active profile — already cleared"); + } else { + const previous = config.activeProfile; + config.activeProfile = null; + saveConfig(config); + showMessage(`Cleared active profile (was ${previous})`); + reload(); + } + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + showMessage(`Clear failed: ${msg}`); + } + return; + } }, { isActive: isActive && inputEnabled } ); @@ -484,7 +505,7 @@ export function ProfilesView({ {!loading && action === "idle" && ( - {"\u21B5"} launch s switch i info m mode d delete h sync shift+h push shift+s source f flags c create shift+c clone + {"\u21B5"} launch s switch x clear i info m mode d delete h sync shift+h push shift+s source f flags c create shift+c clone )} diff --git a/packages/cli/src/tui/views/SessionView.tsx b/packages/cli/src/tui/views/SessionView.tsx index 577359a..060f3f8 100644 --- a/packages/cli/src/tui/views/SessionView.tsx +++ b/packages/cli/src/tui/views/SessionView.tsx @@ -522,10 +522,17 @@ export function SessionView({ ) : ( - - No active profile. Press - c - to create one. + + + No active profile. Use + arc run claude + for native launch, + + + or switch to a profile (press + c + to create one). + )} diff --git a/packages/core/package.json b/packages/core/package.json index 1b49cb9..1ff467a 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -8,5 +8,8 @@ "scripts": { "build": "tsup src/index.ts --format esm --target node20 --clean", "typecheck": "tsc --noEmit" + }, + "dependencies": { + "zod": "^3.25.0" } } diff --git a/packages/core/src/agent/arc-tools.ts b/packages/core/src/agent/arc-tools.ts new file mode 100644 index 0000000..8c5aea5 --- /dev/null +++ b/packages/core/src/agent/arc-tools.ts @@ -0,0 +1,411 @@ +/** + * ARC tool catalog — the concrete set of tools an agent can call against + * ARC's installed state. All handlers delegate to existing core functions; + * no policy logic lives here. + * + * See `docs/plans/ai-and-roundtable.md` AD-3 for the full planned catalog; + * this module ships the Phase 2 baseline (≥15 tools spanning all three + * permission tiers). + */ + +import fs from "node:fs"; +import path from "node:path"; +import { z } from "zod"; +import { loadConfig, saveConfig, cloneProfile } from "../config.js"; +import { getRecentLaunches } from "../history.js"; +import { queryLogEvents, type LogLevel } from "../logging.js"; +import { SkillRegistry } from "../skills/index.js"; +import { PersistentMemory, type MemoryScope } from "../memory/index.js"; +import { TaskStore, type TaskStatus } from "../tasks/index.js"; +import { RemoteAgentRegistry } from "../remote.js"; +import { getSharedSettings } from "../shared.js"; +import type { ToolRegistry } from "./registry.js"; +import type { Tool } from "./types.js"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +const LOG_LEVEL_VALUES = ["debug", "info", "warn", "error"] as const; + +const MEMORY_SCOPE_VALUES = ["session", "persistent", "profile", "team"] as const; + +const TASK_STATUS_VALUES = [ + "created", + "assigned", + "working", + "input-required", + "completed", + "failed", + "cancelled", +] as const; + +/** + * Locate the core package.json at runtime (compiled output lives at + * `packages/core/dist/index.js`, so `../package.json` resolves in both + * source and built form via the `src` → `dist` layout). + */ +function readCorePackageVersion(): string { + const candidates = [ + path.resolve(new URL(".", import.meta.url).pathname, "../../package.json"), + path.resolve(new URL(".", import.meta.url).pathname, "../package.json"), + ]; + for (const candidate of candidates) { + try { + const raw = fs.readFileSync(candidate, "utf-8"); + const parsed = JSON.parse(raw) as { version?: string; name?: string }; + if (parsed.name === "@axiom-labs/arc-core" && typeof parsed.version === "string") { + return parsed.version; + } + } catch { + // try next + } + } + return "unknown"; +} + +// --------------------------------------------------------------------------- +// Read tools +// --------------------------------------------------------------------------- + +const listProfilesTool: Tool = { + name: "list_profiles", + description: "List all ARC profiles with their tool, authType, and description.", + permission: "read", + schema: z.object({}), + handler: async () => { + const cfg = loadConfig(); + return Object.entries(cfg.profiles).map(([name, prof]) => ({ + name, + tool: prof.tool, + authType: prof.authType, + description: prof.description, + createdAt: prof.createdAt, + })); + }, +}; + +const showProfileTool: Tool = { + name: "show_profile", + description: "Return the full record of a single profile by name.", + permission: "read", + schema: z.object({ name: z.string() }), + handler: async (input) => { + const { name } = input as { name: string }; + const cfg = loadConfig(); + const prof = cfg.profiles[name]; + if (!prof) throw new Error(`Profile '${name}' not found`); + return { name, ...prof }; + }, +}; + +const getActiveProfileTool: Tool = { + name: "get_active_profile", + description: "Return the name and record of the currently active profile, or null if none is active.", + permission: "read", + schema: z.object({}), + handler: async () => { + const cfg = loadConfig(); + const name = cfg.activeProfile as string | null; + if (!name) return null; + const prof = cfg.profiles[name]; + if (!prof) return null; + return { name, ...prof }; + }, +}; + +const listLaunchesTool: Tool = { + name: "list_launches", + description: "List recent profile launch history entries (most-recent first).", + permission: "read", + schema: z.object({ limit: z.number().int().positive().optional() }), + handler: async (input) => { + const { limit } = input as { limit?: number }; + return getRecentLaunches(limit ?? 10); + }, +}; + +const queryLogsTool: Tool = { + name: "query_logs", + description: "Query ARC's structured log with optional level/component filters.", + permission: "read", + schema: z.object({ + limit: z.number().int().positive().optional(), + level: z.enum(LOG_LEVEL_VALUES).optional(), + component: z.string().optional(), + }), + handler: async (input) => { + const { limit, level, component } = input as { + limit?: number; + level?: LogLevel; + component?: string; + }; + return queryLogEvents({ limit, level, component }); + }, +}; + +const listSkillsTool: Tool = { + name: "list_skills", + description: "List skills in the in-memory SkillRegistry.", + permission: "read", + schema: z.object({}), + handler: async () => { + // SkillRegistry is ephemeral; Phase 2 returns an empty registry snapshot. + // Callers that keep a shared registry should inject one via a future ctx hook. + const registry = new SkillRegistry(); + return registry.list(); + }, +}; + +const listMemoriesTool: Tool = { + name: "list_memories", + description: "List entries from a PersistentMemory scope (default 'persistent').", + permission: "read", + schema: z.object({ + limit: z.number().int().positive().optional(), + scope: z.enum(MEMORY_SCOPE_VALUES).optional(), + }), + handler: async (input) => { + const { limit, scope } = input as { limit?: number; scope?: MemoryScope }; + const store = new PersistentMemory(scope ?? "persistent"); + const entries = store.list(); + return typeof limit === "number" ? entries.slice(0, limit) : entries; + }, +}; + +const listTasksTool: Tool = { + name: "list_tasks", + description: "List tasks from the TaskStore, optionally filtered by status.", + permission: "read", + schema: z.object({ status: z.enum(TASK_STATUS_VALUES).optional() }), + handler: async (input) => { + const { status } = input as { status?: TaskStatus }; + const store = new TaskStore(); + return store.list(status ? { status } : undefined); + }, +}; + +const listRemoteAgentsTool: Tool = { + name: "list_remote_agents", + description: "List all registered remote agents and their status.", + permission: "read", + schema: z.object({}), + handler: async () => { + const registry = new RemoteAgentRegistry(); + return registry.list(); + }, +}; + +const listMcpServersTool: Tool = { + name: "list_mcp_servers", + description: "List MCP servers configured in the shared layer settings.", + permission: "read", + schema: z.object({}), + handler: async () => { + const settings = getSharedSettings(); + if (!settings) return []; + const servers = settings["mcpServers"]; + if (!servers || typeof servers !== "object") return []; + return Object.entries(servers as Record).map(([name, def]) => ({ + name, + config: def, + })); + }, +}; + +const getArcVersionTool: Tool = { + name: "get_arc_version", + description: "Return the installed ARC core package version string.", + permission: "read", + schema: z.object({}), + handler: async () => { + return { version: readCorePackageVersion() }; + }, +}; + +// --------------------------------------------------------------------------- +// Write tools +// --------------------------------------------------------------------------- + +const cloneProfileTool: Tool = { + name: "clone_profile", + description: "Clone a profile to a new name. Optionally copy its configDir on disk.", + permission: "write", + schema: z.object({ + src: z.string(), + dst: z.string(), + copyConfigDir: z.boolean().optional(), + }), + handler: async (input) => { + const { src, dst, copyConfigDir } = input as { + src: string; + dst: string; + copyConfigDir?: boolean; + }; + const cfg = loadConfig(); + const updated = cloneProfile(cfg, src, dst, { copyConfigDir }); + saveConfig(updated); + return { src, dst, ok: true }; + }, +}; + +const switchActiveProfileTool: Tool = { + name: "switch_active_profile", + description: "Set the active ARC profile. Pass null to clear the active profile.", + permission: "write", + schema: z.object({ name: z.string().nullable() }), + handler: async (input) => { + const { name } = input as { name: string | null }; + const cfg = loadConfig(); + if (name !== null && !cfg.profiles[name]) { + throw new Error(`Profile '${name}' not found`); + } + // Guard: activeProfile may be `string | null` concurrently. + (cfg as { activeProfile: string | null }).activeProfile = name; + saveConfig(cfg as typeof cfg); + return { activeProfile: name, ok: true }; + }, +}; + +const setProfileInstructionsTool: Tool = { + name: "set_profile_instructions", + description: + "Set the `instructions` or `instructionsFile` field of a profile. Omit both to clear.", + permission: "write", + schema: z.object({ + profileName: z.string(), + instructions: z.string().optional(), + instructionsFile: z.string().optional(), + }), + handler: async (input) => { + const { profileName, instructions, instructionsFile } = input as { + profileName: string; + instructions?: string; + instructionsFile?: string; + }; + const cfg = loadConfig(); + const prof = cfg.profiles[profileName]; + if (!prof) throw new Error(`Profile '${profileName}' not found`); + if (instructions === undefined) { + delete prof.instructions; + } else { + prof.instructions = instructions; + } + if (instructionsFile === undefined) { + delete prof.instructionsFile; + } else { + prof.instructionsFile = instructionsFile; + } + saveConfig(cfg); + return { + profileName, + instructions: prof.instructions, + instructionsFile: prof.instructionsFile, + }; + }, +}; + +const setProfileFlagsTool: Tool = { + name: "set_profile_flags", + description: "Replace the launchArgs array on a profile.", + permission: "write", + schema: z.object({ + profileName: z.string(), + flags: z.array(z.string()), + }), + handler: async (input) => { + const { profileName, flags } = input as { profileName: string; flags: string[] }; + const cfg = loadConfig(); + const prof = cfg.profiles[profileName]; + if (!prof) throw new Error(`Profile '${profileName}' not found`); + prof.launchArgs = [...flags]; + saveConfig(cfg); + return { profileName, launchArgs: prof.launchArgs }; + }, +}; + +// --------------------------------------------------------------------------- +// Dangerous tools +// --------------------------------------------------------------------------- + +const deleteProfileTool: Tool = { + name: "delete_profile", + description: + "Delete a profile from ARC config. Does NOT remove the profile's configDir on disk.", + permission: "dangerous", + schema: z.object({ name: z.string() }), + handler: async (input) => { + const { name } = input as { name: string }; + const cfg = loadConfig(); + if (!cfg.profiles[name]) { + throw new Error(`Profile '${name}' not found`); + } + delete cfg.profiles[name]; + const activeProfile = cfg.activeProfile as string | null; + if (activeProfile === name) { + (cfg as { activeProfile: string | null }).activeProfile = null; + } + if (Array.isArray(cfg.profileOrder)) { + cfg.profileOrder = cfg.profileOrder.filter((n) => n !== name); + } + saveConfig(cfg); + return { name, deleted: true }; + }, +}; + +// --------------------------------------------------------------------------- +// Registration +// --------------------------------------------------------------------------- + +/** + * Register ARC's Phase 2 tool catalog on the given registry. + * Safe to call once at startup; attempting to register the same tool twice + * will throw via the registry's duplicate-name guard. + */ +export function registerArcTools(registry: ToolRegistry): void { + const tools = [ + // Read + listProfilesTool, + showProfileTool, + getActiveProfileTool, + listLaunchesTool, + queryLogsTool, + listSkillsTool, + listMemoriesTool, + listTasksTool, + listRemoteAgentsTool, + listMcpServersTool, + getArcVersionTool, + // Write + cloneProfileTool, + switchActiveProfileTool, + setProfileInstructionsTool, + setProfileFlagsTool, + // Dangerous + deleteProfileTool, + ]; + + for (const tool of tools) { + registry.register(tool); + } +} + +/** Exposed for tests and introspection. */ +export const ARC_TOOLS = Object.freeze({ + list_profiles: listProfilesTool, + show_profile: showProfileTool, + get_active_profile: getActiveProfileTool, + list_launches: listLaunchesTool, + query_logs: queryLogsTool, + list_skills: listSkillsTool, + list_memories: listMemoriesTool, + list_tasks: listTasksTool, + list_remote_agents: listRemoteAgentsTool, + list_mcp_servers: listMcpServersTool, + get_arc_version: getArcVersionTool, + clone_profile: cloneProfileTool, + switch_active_profile: switchActiveProfileTool, + set_profile_instructions: setProfileInstructionsTool, + set_profile_flags: setProfileFlagsTool, + delete_profile: deleteProfileTool, +}); diff --git a/packages/core/src/agent/index.ts b/packages/core/src/agent/index.ts new file mode 100644 index 0000000..8e9dd0d --- /dev/null +++ b/packages/core/src/agent/index.ts @@ -0,0 +1,25 @@ +/** + * Agent tool-use public surface (Phase 2). + * + * See `docs/plans/ai-and-roundtable.md` — AD-2, AD-3. + */ + +export type { + PermissionMode, + ToolPermission, + ToolContext, + Tool, + ToolDefinition, + ToolResult, + AgentEvent, + AgentLoopOptions, + ToolRegistryLike, +} from "./types.js"; + +export { canUseTool, needsConfirmation } from "./permissions.js"; + +export { ToolRegistry } from "./registry.js"; + +export { runAgent } from "./loop.js"; + +export { registerArcTools, ARC_TOOLS } from "./arc-tools.js"; diff --git a/packages/core/src/agent/loop.ts b/packages/core/src/agent/loop.ts new file mode 100644 index 0000000..c6c4966 --- /dev/null +++ b/packages/core/src/agent/loop.ts @@ -0,0 +1,117 @@ +/** + * Agent loop — observe `AgentClient` chunks and dispatch tool calls through + * the registry, re-emitting everything as `AgentEvent`s to the caller. + * + * TODO (Phase 4): the one-shot `AgentClient` from Phase 1 cannot accept + * tool_result messages as additional input to the same session. This loop + * therefore only *observes* tool calls an agent makes and dispatches them + * locally; it does not round-trip `tool_result` back to the LLM. True + * multi-turn tool use (model sees result, continues reasoning) requires + * persistent-session support — to be added when we build the interactive + * agent client in Phase 4 and the roundtable orchestrator in Phase 5. + */ + +import type { AgentChunk } from "../agent-client/index.js"; +import type { AgentEvent, AgentLoopOptions, ToolResult } from "./types.js"; + +const DEFAULT_MAX_TURNS = 10; + +/** + * Consume an `AgentClient.send(...)` stream, dispatching tool calls through + * the registry and re-emitting text / thinking / tool_call / tool_result / + * error / done events. + * + * The loop completes when the client emits `{type:"done"}`, when `maxTurns` + * tool-call dispatches are reached, or when the client ends its stream. + */ +export async function* runAgent( + opts: AgentLoopOptions, + userPrompt: string, +): AsyncIterable { + const { client, registry, ctx } = opts; + const maxTurns = opts.maxTurns ?? DEFAULT_MAX_TURNS; + let toolCalls = 0; + + let stream: AsyncIterable; + try { + stream = client.send(userPrompt); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + yield { type: "error", message: `client.send failed: ${msg}` }; + yield { type: "done", reason: "error" }; + return; + } + + try { + for await (const chunk of stream) { + switch (chunk.type) { + case "text": + yield { type: "text", content: chunk.content }; + break; + + case "thinking": + yield { type: "thinking", content: chunk.content }; + break; + + case "tool_call": { + toolCalls += 1; + yield { + type: "tool_call", + id: chunk.id, + tool: chunk.tool, + input: chunk.input, + }; + + let result: ToolResult; + if (toolCalls > maxTurns) { + result = { + ok: false, + blocked: true, + error: `maxTurns (${maxTurns}) exceeded`, + }; + } else { + result = await registry.execute(chunk.tool, chunk.input, ctx); + } + + yield { + type: "tool_result", + id: chunk.id, + tool: chunk.tool, + result, + }; + + if (toolCalls > maxTurns) { + yield { type: "done", reason: "max_turns" }; + return; + } + break; + } + + case "tool_result": + // Client-side tool results (from MCP side-channel) — surface as-is + // but we don't have registry-level structure for them; wrap to ToolResult. + yield { + type: "tool_result", + id: chunk.id, + tool: "", + result: { ok: !chunk.isError, output: chunk.result } as ToolResult, + }; + break; + + case "error": + yield { type: "error", message: chunk.message }; + break; + + case "done": + yield { type: "done", reason: chunk.reason }; + return; + } + } + // Stream ended without an explicit `done` — emit one for callers. + yield { type: "done", reason: "stop" }; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + yield { type: "error", message: msg }; + yield { type: "done", reason: "error" }; + } +} diff --git a/packages/core/src/agent/permissions.ts b/packages/core/src/agent/permissions.ts new file mode 100644 index 0000000..a6ed1fb --- /dev/null +++ b/packages/core/src/agent/permissions.ts @@ -0,0 +1,39 @@ +/** + * Permission gating helpers for the agent tool registry. + * + * The matrix: + * + * | mode | read | write | dangerous | + * |-------------|------|------------------------|------------------------| + * | read-only | yes | hidden | hidden | + * | supervised | yes | yes (needs confirm) | yes (needs confirm) | + * | autonomous | yes | yes (no confirm) | yes (no confirm) | + */ + +import type { PermissionMode, Tool } from "./types.js"; + +/** + * Whether a tool may be listed and invoked under the given mode. + * + * In `read-only` mode, only `read` tools are available. Other modes expose + * everything — the separate `needsConfirmation` gate decides whether the + * human must approve. + */ +export function canUseTool(tool: Tool, mode: PermissionMode): boolean { + if (mode === "read-only") { + return tool.permission === "read"; + } + return true; +} + +/** + * Whether a handler call must be preceded by `ctx.confirm(...)`. + * + * Confirmation is only required in `supervised` mode for `write` or + * `dangerous` tools. `read-only` never reaches write/dangerous tools; in + * `autonomous` the operator has pre-authorized everything. + */ +export function needsConfirmation(tool: Tool, mode: PermissionMode): boolean { + if (mode !== "supervised") return false; + return tool.permission === "write" || tool.permission === "dangerous"; +} diff --git a/packages/core/src/agent/registry.ts b/packages/core/src/agent/registry.ts new file mode 100644 index 0000000..b59c1e9 --- /dev/null +++ b/packages/core/src/agent/registry.ts @@ -0,0 +1,226 @@ +/** + * ToolRegistry — central dispatcher for agent tool calls. + * + * Responsibilities: + * 1. Store `Tool` definitions with unique names. + * 2. Expose `ToolDefinition`s to LLMs filtered by permission mode. + * 3. Validate inputs against each tool's zod schema before dispatch. + * 4. Enforce permission gating (`canUseTool`) and confirmation gating + * (`needsConfirmation`) before calling the handler. + * 5. Catch handler errors and return a structured `ToolResult`. + */ + +import type { z } from "zod"; +import { writeLogEvent } from "../logging.js"; +import { canUseTool, needsConfirmation } from "./permissions.js"; +import type { + PermissionMode, + Tool, + ToolContext, + ToolDefinition, + ToolResult, +} from "./types.js"; + +// --------------------------------------------------------------------------- +// Minimal zod → JSON-schema shim +// --------------------------------------------------------------------------- + +/** + * Inline, dependency-free converter covering the zod shapes we actually use + * in ARC tool definitions: objects of primitives, optionals, arrays, enums. + * We intentionally do NOT re-implement the full spec — any exotic shape falls + * through to `{}` which the LLM will treat as "any JSON". + */ +function zodToSchema(schema: z.ZodTypeAny): Record { + const def = (schema as { _def?: { typeName?: string } })._def; + const typeName = def?.typeName; + + switch (typeName) { + case "ZodString": + return { type: "string" }; + case "ZodNumber": + return { type: "number" }; + case "ZodBoolean": + return { type: "boolean" }; + case "ZodEnum": { + const values = (def as { values?: readonly string[] }).values ?? []; + return { type: "string", enum: [...values] }; + } + case "ZodArray": { + const inner = (def as { type?: z.ZodTypeAny }).type; + return { type: "array", items: inner ? zodToSchema(inner) : {} }; + } + case "ZodOptional": + case "ZodNullable": { + const inner = (def as { innerType?: z.ZodTypeAny }).innerType; + return inner ? zodToSchema(inner) : {}; + } + case "ZodObject": { + const shape = (def as { shape?: () => Record }).shape?.() ?? {}; + const properties: Record = {}; + const required: string[] = []; + for (const [key, child] of Object.entries(shape)) { + properties[key] = zodToSchema(child); + const childDef = (child as { _def?: { typeName?: string } })._def; + if (childDef?.typeName !== "ZodOptional" && childDef?.typeName !== "ZodDefault") { + required.push(key); + } + } + const out: Record = { type: "object", properties }; + if (required.length > 0) out["required"] = required; + return out; + } + case "ZodDefault": { + const inner = (def as { innerType?: z.ZodTypeAny }).innerType; + return inner ? zodToSchema(inner) : {}; + } + default: + return {}; + } +} + +// --------------------------------------------------------------------------- +// ToolRegistry +// --------------------------------------------------------------------------- + +// Tool is used internally to avoid variance issues when callers +// register typed Tool values — TypeScript treats `unknown` +// contravariantly on handler input, so `Tool<{...}, ...>` is not assignable +// to `Tool` without `any` on the storage side. +// eslint-disable-next-line @typescript-eslint/no-explicit-any +type AnyTool = Tool; + +export class ToolRegistry { + private tools = new Map(); + + /** Register a tool. Throws if a tool with the same name already exists. */ + register(tool: AnyTool): void { + if (this.tools.has(tool.name)) { + throw new Error(`Tool '${tool.name}' is already registered`); + } + this.tools.set(tool.name, tool); + } + + /** Check whether a tool with the given name is registered. */ + has(name: string): boolean { + return this.tools.has(name); + } + + /** Retrieve a tool by name (or undefined if missing). */ + get(name: string): AnyTool | undefined { + return this.tools.get(name); + } + + /** + * List registered tools. If `filter` is provided, only tools matching the + * predicate are returned. + */ + list(filter?: (t: AnyTool) => boolean): AnyTool[] { + const all = [...this.tools.values()]; + return filter ? all.filter(filter) : all; + } + + /** + * Produce the public `ToolDefinition[]` that should be sent to the LLM for + * the given permission mode. + * + * If `modeFilter` is omitted, returns definitions for every registered + * tool regardless of permission tier. + */ + getSchemas(modeFilter?: PermissionMode): ToolDefinition[] { + return this.list((t) => (modeFilter ? canUseTool(t, modeFilter) : true)).map((t) => ({ + name: t.name, + description: t.description, + permission: t.permission, + inputSchema: zodToSchema(t.schema as z.ZodTypeAny), + })); + } + + /** + * Validate + dispatch a tool call. + * + * Returns a `ToolResult` — never throws for ordinary failure paths + * (unknown tool, validation failure, permission denial, handler error). + */ + async execute(name: string, input: unknown, ctx: ToolContext): Promise { + const tool = this.tools.get(name); + if (!tool) { + return { ok: false, error: `Unknown tool: ${name}` }; + } + + // Permission tier vs. current mode. + if (!canUseTool(tool, ctx.mode)) { + writeLogEvent({ + level: "warn", + component: "agent:registry", + action: "tool-blocked", + message: `Tool '${name}' blocked under permission mode '${ctx.mode}'`, + data: { tool: name, permission: tool.permission, mode: ctx.mode }, + }); + return { + ok: false, + blocked: true, + error: `Tool '${name}' is not available in '${ctx.mode}' mode (requires '${tool.permission}')`, + }; + } + + // Input validation via zod. + const parsed = tool.schema.safeParse(input); + if (!parsed.success) { + const issues = parsed.error.issues + .map((i) => `${i.path.join(".") || ""}: ${i.message}`) + .join("; "); + return { ok: false, error: `Invalid input for '${name}': ${issues}` }; + } + + // Supervised write/dangerous → human confirmation. + if (needsConfirmation(tool, ctx.mode)) { + const prompt = + tool.permission === "dangerous" + ? `DANGEROUS: run tool '${name}'? ${tool.description}` + : `Run tool '${name}'? ${tool.description}`; + let approved = false; + try { + approved = await ctx.confirm(prompt); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + return { ok: false, error: `Confirmation failed: ${msg}` }; + } + if (!approved) { + writeLogEvent({ + level: "info", + component: "agent:registry", + action: "tool-declined", + message: `User declined tool '${name}'`, + data: { tool: name, permission: tool.permission }, + }); + return { ok: false, blocked: true, error: `User declined tool '${name}'` }; + } + } + + // Dispatch. + ctx.log(`tool:${name} dispatching`); + writeLogEvent({ + level: "info", + component: "agent:registry", + action: "tool-dispatch", + message: `Executing tool '${name}'`, + data: { tool: name, permission: tool.permission, mode: ctx.mode }, + }); + + try { + const output = await tool.handler(parsed.data, ctx); + return { ok: true, output }; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + writeLogEvent({ + level: "error", + component: "agent:registry", + action: "tool-error", + message: `Tool '${name}' threw: ${msg}`, + data: { tool: name }, + }); + return { ok: false, error: msg }; + } + } +} diff --git a/packages/core/src/agent/types.ts b/packages/core/src/agent/types.ts new file mode 100644 index 0000000..448fcd7 --- /dev/null +++ b/packages/core/src/agent/types.ts @@ -0,0 +1,139 @@ +/** + * Agent tool-use types — Phase 2 (tool registry + agent loop). + * + * These are the core primitives for the generic tool-use infrastructure that + * lets an agent (driven by `AgentClient` from Phase 1) call ARC functionality + * through a permission-gated registry. + * + * See `docs/plans/ai-and-roundtable.md` — AD-2, AD-3. + */ + +import type { z } from "zod"; +import type { Profile } from "../types.js"; +import type { AgentClient } from "../agent-client/index.js"; + +// --------------------------------------------------------------------------- +// Permission model +// --------------------------------------------------------------------------- + +/** + * Permission mode controlling which tools an agent may invoke and whether a + * human-in-the-loop confirmation is required before write/dangerous calls. + * + * - `read-only` — only `read` tools are exposed to the model. + * - `supervised` — all tools exposed; `write` and `dangerous` require + * confirmation via `ToolContext.confirm`. + * - `autonomous` — all tools exposed; no confirmation prompts. Every action + * is still logged. + */ +export type PermissionMode = "read-only" | "supervised" | "autonomous"; + +/** + * Permission tier a tool requires. + * + * - `read` — side-effect-free inspection of local state. + * - `write` — mutates ARC config or local state; reversible. + * - `dangerous` — destructive or otherwise high-risk operation. + */ +export type ToolPermission = "read" | "write" | "dangerous"; + +// --------------------------------------------------------------------------- +// Tool context +// --------------------------------------------------------------------------- + +/** + * Runtime context passed to every tool handler. Wraps permission mode, + * user-confirmation callback, structured logger, and (optional) active profile. + */ +export interface ToolContext { + mode: PermissionMode; + /** + * Prompt the human for confirmation. Returns `true` to proceed, `false` to + * block. Tool handlers should *not* call this directly — the registry calls + * it before dispatching to the handler when the permission tier + mode + * demand it. Handlers may still use it ad-hoc for multi-step confirmations. + */ + confirm: (prompt: string) => Promise; + /** Structured info-level log sink. Non-throwing. */ + log: (msg: string) => void; + /** Active profile, when the caller has resolved one. May be absent. */ + profile?: Profile; +} + +// --------------------------------------------------------------------------- +// Tool definition +// --------------------------------------------------------------------------- + +/** + * A registered tool. `Input` is validated against `schema` before the handler + * is called. `Output` is opaque to the registry — handlers may return any JSON + * value. + */ +export interface Tool { + name: string; + description: string; + permission: ToolPermission; + schema: z.ZodSchema; + handler: (input: Input, ctx: ToolContext) => Promise; +} + +/** + * Public form of a tool sent to LLMs (no handler, no zod object — just + * name + description + a JSON-Schema-ish shape the model can reason about). + * + * We intentionally keep `inputSchema` shape-agnostic (`Record`) + * so that callers can use `zod-to-json-schema` or any other converter without + * this module taking on that dependency. + */ +export interface ToolDefinition { + name: string; + description: string; + permission: ToolPermission; + inputSchema: Record; +} + +// --------------------------------------------------------------------------- +// Tool result +// --------------------------------------------------------------------------- + +/** + * Result of executing a tool via the registry. + * + * `blocked: true` indicates the tool was refused (permission gate or + * confirmation declined) — distinct from a handler-thrown error. + */ +export type ToolResult = + | { ok: true; output: unknown } + | { ok: false; error: string; blocked?: boolean }; + +// --------------------------------------------------------------------------- +// Agent loop +// --------------------------------------------------------------------------- + +/** + * Events emitted by `runAgent` as it observes an agent session. + */ +export type AgentEvent = + | { type: "text"; content: string } + | { type: "thinking"; content: string } + | { type: "tool_call"; id: string; tool: string; input: unknown } + | { type: "tool_result"; id: string; tool: string; result: ToolResult } + | { type: "error"; message: string } + | { type: "done"; reason: "end_turn" | "max_turns" | "stop" | "error" }; + +/** + * Forward declaration — avoids a circular type import. The concrete + * `ToolRegistry` class lives in `./registry.ts`. + */ +export interface ToolRegistryLike { + execute(name: string, input: unknown, ctx: ToolContext): Promise; + has(name: string): boolean; +} + +export interface AgentLoopOptions { + client: AgentClient; + registry: ToolRegistryLike; + ctx: ToolContext; + /** Safety cap on number of tool-call cycles. Default: 10. */ + maxTurns?: number; +} diff --git a/packages/core/src/config.ts b/packages/core/src/config.ts index e7e6ab8..162adfe 100644 --- a/packages/core/src/config.ts +++ b/packages/core/src/config.ts @@ -8,7 +8,9 @@ import type { ArcConfig, Profile } from "./types.js"; const AUTH_TYPES = new Set(["oauth", "api-key", "bedrock", "vertex", "foundry", "openai-compat"]); export function defaultConfig(): ArcConfig { - return { version: 1, activeProfile: "default", profiles: {} }; + // New installs start with no active profile — tools launched through + // ARC without an explicit profile default to native (bare) mode. + return { version: 1, activeProfile: null, profiles: {} }; } export function validateConfig(config: unknown): config is ArcConfig { @@ -17,7 +19,11 @@ export function validateConfig(config: unknown): config is ArcConfig { } const obj = config as Record; - if (obj["version"] !== 1 || typeof obj["activeProfile"] !== "string") { + if (obj["version"] !== 1) { + return false; + } + // activeProfile is either a string (named profile) or null (none). + if (obj["activeProfile"] !== null && typeof obj["activeProfile"] !== "string") { return false; } @@ -98,11 +104,22 @@ export function saveConfig(config: ArcConfig): void { } export function getActiveProfile(config: ArcConfig): Profile | undefined { + if (config.activeProfile === null) return undefined; return config.profiles[config.activeProfile]; } +/** + * Resolve a profile name for a command. + * Throws when no `name` is provided and no active profile is set. + */ export function resolveProfileName(config: ArcConfig, name?: string): string { - return name ?? config.activeProfile; + if (name) return name; + if (config.activeProfile === null) { + throw new Error( + "No active profile. Use 'arc profile switch ' or pass --profile." + ); + } + return config.activeProfile; } const MAX_INHERITANCE_DEPTH = 10; diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index ccaf2a2..122a69e 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -7,6 +7,8 @@ export * from "./keyring.js"; export * from "./secrets/index.js"; export * from "./history.js"; export * from "./agent-client/index.js"; +export * from "./agent/index.js"; +export * from "./knowledge/index.js"; export * from "./lifecycle.js"; export * from "./logging.js"; export * from "./paths.js"; diff --git a/packages/core/src/knowledge/feature-index.ts b/packages/core/src/knowledge/feature-index.ts new file mode 100644 index 0000000..a57aa37 --- /dev/null +++ b/packages/core/src/knowledge/feature-index.ts @@ -0,0 +1,316 @@ +/** + * Curated index of ARC features with status and short summaries. Fed into + * the system prompt so the assistant can talk accurately about what ARC + * can and cannot do. Keep roughly in sync with FEATURES.md — this file is + * authoritative for what the AI layer sees. + */ + +export type FeatureStatus = "shipped" | "roadmap" | "deferred"; + +export interface FeatureEntry { + id: string; + name: string; + status: FeatureStatus; + summary: string; + since?: string; + docLink?: string; +} + +export const FEATURES_INDEX: FeatureEntry[] = [ + { + id: "profile-management", + name: "Profile management", + status: "shipped", + summary: + "Create, clone, import, export, switch, and delete isolated profiles with per-profile config dirs, env, and auth.", + since: "0.1.0", + docLink: "/docs/profiles", + }, + { + id: "profile-inheritance", + name: "Profile inheritance", + status: "shipped", + summary: + "A profile may inherit env, hooks, and fields from a parent profile via the `inherits` key.", + since: "1.2.0", + docLink: "/docs/profiles", + }, + { + id: "bare-launch", + name: "Bare launch", + status: "shipped", + summary: + "Launch the tool binary directly with profile env — no hook wrapping, no adapter overhead.", + since: "0.3.0", + docLink: "/docs/advanced", + }, + { + id: "native-launch", + name: "Native adapter launch", + status: "shipped", + summary: + "Launch through a tool's native plugin/adapter when available (Claude Code plugin, OpenClaw plugin).", + since: "1.0.0", + }, + { + id: "worker-mode", + name: "Worker permission mode", + status: "shipped", + summary: + "Restricted permission policy for non-interactive worker launches (explicit allowlist, no approvals).", + since: "1.4.0", + }, + { + id: "doctor", + name: "Doctor diagnostics", + status: "shipped", + summary: + "Environment checks (PATH, auth, writable config dir) with inline repair hints.", + since: "0.2.0", + docLink: "/docs/troubleshooting", + }, + { + id: "shared-layer", + name: "Shared layer", + status: "shipped", + summary: + "~/.arc/shared/ syncs MCP servers, commands, CLAUDE.md, memory, and projects across opted-in profiles.", + since: "1.1.0", + docLink: "/docs/advanced", + }, + { + id: "credential-hotswap", + name: "Credential hot-swap", + status: "shipped", + summary: + "[experimental] Capture and swap tool auth credentials in the canonical tool dir without touching MCPs, settings, or history.", + since: "1.3.0", + }, + { + id: "agent-instructions", + name: "Agent instructions", + status: "shipped", + summary: + "Per-profile prompt text resolved at launch and injected as ARC_AGENT_INSTRUCTIONS env var.", + since: "1.6.0", + }, + { + id: "openai-compat-providers", + name: "OpenAI-compatible providers", + status: "shipped", + summary: + "Custom providers (baseUrl + model + apiKeyEnvVar) with presets for OpenRouter, Ollama, LM Studio, Together, Groq, MiniMax, DeepSeek.", + since: "1.6.0", + }, + { + id: "hook-pipeline", + name: "Hook pipeline", + status: "shipped", + summary: + "Priority-ordered bus of 8 hooks around every agent turn; per-profile enforcement modes (off/log/advise/enforce).", + since: "1.4.0", + }, + { + id: "risk-classification", + name: "Risk classification", + status: "shipped", + summary: + "Hook that classifies agent output into low/medium/high/critical tiers to gate supervision.", + since: "1.4.0", + }, + { + id: "source-classification", + name: "Source classification", + status: "shipped", + summary: "Hook that tags message sources (user, agent, system, tool) for downstream routing.", + since: "1.4.0", + }, + { + id: "interagent-routing", + name: "Interagent routing", + status: "shipped", + summary: "Hook that routes messages between agents in multi-agent sessions.", + since: "1.5.0", + }, + { + id: "supervision-gate", + name: "Supervision gate", + status: "shipped", + summary: + "Hook that pauses high-risk actions for human approval or LLM review depending on permission mode.", + since: "1.5.0", + }, + { + id: "post-verify", + name: "Post-verify hook", + status: "shipped", + summary: "Runs after agent output to verify claims and detect hallucinated state.", + since: "1.5.0", + }, + { + id: "roundtable", + name: "Roundtable", + status: "shipped", + summary: + "Multi-agent discussion protocol — several agents answer the same prompt, synthesized into a single verdict via the hook bus.", + since: "1.5.0", + }, + { + id: "task-delegation", + name: "Task delegation", + status: "shipped", + summary: "Persisted tasks with priority, status, assignee, and interagent message bus.", + since: "1.5.0", + }, + { + id: "sessions", + name: "Session continuity", + status: "shipped", + summary: + "Session threads persisted across launches; resume by id or most-recent detection.", + since: "1.5.0", + }, + { + id: "memory", + name: "Memory system", + status: "shipped", + summary: + "Scoped, typed, decay-scored memory with session and persistent stores and full-text search.", + since: "1.4.0", + }, + { + id: "skills", + name: "Skill registry", + status: "shipped", + summary: + "Load skills from directories, convert MCP tools into skills, detect repeated patterns, auto-generate skills.", + since: "1.5.0", + }, + { + id: "telemetry", + name: "Telemetry (OpenTelemetry)", + status: "shipped", + summary: + "OTel spans around sessions, preflight, hooks, agent execution, tool use, postflight, circuit breakers; exporters for console, JSON file, OTLP.", + since: "1.4.0", + }, + { + id: "sync", + name: "Cloud sync", + status: "shipped", + summary: "Filesystem-based sync provider + manager with delta/change tracking.", + since: "1.4.0", + }, + { + id: "plugins", + name: "Plugin registry", + status: "shipped", + summary: "Install, enable, disable, and list plugins with capability manifests.", + since: "1.5.0", + }, + { + id: "remote-agents", + name: "Remote agents", + status: "shipped", + summary: "Register remote agents over HTTP/MCP transports with health checks.", + since: "1.5.0", + }, + { + id: "factory-mode", + name: "Dark Factory Mode", + status: "shipped", + summary: "Parallel wave execution of agent specs for batch/background work.", + since: "1.5.0", + }, + { + id: "dashboard", + name: "Web dashboard", + status: "shipped", + summary: + "React web UI with 13 views (overview, sessions, traces, risk, tasks, skills, memory, agents, factory, profiles, diagnostics, sync, plugins).", + since: "1.5.0", + }, + { + id: "mcp-integration", + name: "MCP integration", + status: "shipped", + summary: "Per-profile MCP server declarations reachable from the launched tool.", + since: "1.2.0", + }, + { + id: "backup-restore", + name: "Backup and restore", + status: "shipped", + summary: "Snapshot ~/.arc, list snapshots, and restore with confirmation.", + since: "1.3.0", + }, + { + id: "onboarding", + name: "First-run onboarding", + status: "shipped", + summary: + "Fullscreen TUI wizard for multi-select tool import, rename, and batch profile creation.", + since: "1.2.0", + docLink: "/docs/getting-started", + }, + { + id: "workspace-shell", + name: "Workspace shell", + status: "shipped", + summary: + "TUI shell with tokenized input (syntax highlighting + auto-complete) for mixing shell commands and /arc actions.", + since: "1.6.0", + }, + { + id: "ai-assistant", + name: "In-app AI assistant", + status: "roadmap", + summary: + "Embedded assistant with ARC domain knowledge and tool access for profile/task/session operations (AD-6).", + }, + { + id: "knowledge-endowment", + name: "Knowledge endowment layer", + status: "shipped", + summary: + "System-prompt composition layer (static knowledge + live state) powering the in-app assistant (AD-6 Phase 3).", + since: "1.7.0", + }, +]; + +function normalize(s: string): string { + return s.toLowerCase().replace(/[\s_-]+/g, ""); +} + +/** + * Look up a feature by id or name with simple fuzzy matching: + * exact id → exact name → normalized contains. + */ +export function getFeature(idOrName: string): FeatureEntry | undefined { + if (!idOrName) return undefined; + const q = idOrName.trim(); + const qn = normalize(q); + + // exact id + const byId = FEATURES_INDEX.find((f) => f.id === q); + if (byId) return byId; + + // exact name (case-insensitive) + const byName = FEATURES_INDEX.find((f) => f.name.toLowerCase() === q.toLowerCase()); + if (byName) return byName; + + // normalized id contains + const byIdFuzzy = FEATURES_INDEX.find((f) => normalize(f.id).includes(qn)); + if (byIdFuzzy) return byIdFuzzy; + + // normalized name contains + const byNameFuzzy = FEATURES_INDEX.find((f) => normalize(f.name).includes(qn)); + if (byNameFuzzy) return byNameFuzzy; + + return undefined; +} + +/** Filter features by status. */ +export function getFeaturesByStatus(status: FeatureStatus): FeatureEntry[] { + return FEATURES_INDEX.filter((f) => f.status === status); +} diff --git a/packages/core/src/knowledge/index.ts b/packages/core/src/knowledge/index.ts new file mode 100644 index 0000000..a9ba0e2 --- /dev/null +++ b/packages/core/src/knowledge/index.ts @@ -0,0 +1,31 @@ +/** + * ARC knowledge layer — static domain knowledge + runtime prompt composer + * powering the in-app AI assistant (AD-6 Phase 3). + */ + +export { + ARC_KNOWLEDGE, + COMMANDS_CATALOG, + type ArcKnowledge, + type CommandDoc, + type CommandCategory, +} from "./static.js"; + +export { + FEATURES_INDEX, + getFeature, + getFeaturesByStatus, + type FeatureEntry, + type FeatureStatus, +} from "./feature-index.js"; + +// NOTE: estimateTokens is intentionally NOT re-exported here — the +// canonical `estimateTokens` lives in context-manager.ts and is already +// re-exported by packages/core/src/index.ts. +// PermissionMode and LaunchHistoryEntry are also owned by other modules +// (agent/ and history.ts respectively); import from ./runtime.js directly +// if you need the knowledge-layer-specific helpers. +export { + buildSystemPrompt, + type KnowledgeContext, +} from "./runtime.js"; diff --git a/packages/core/src/knowledge/runtime.ts b/packages/core/src/knowledge/runtime.ts new file mode 100644 index 0000000..2295e50 --- /dev/null +++ b/packages/core/src/knowledge/runtime.ts @@ -0,0 +1,195 @@ +/** + * Runtime composition of the ARC assistant system prompt. Blends static + * domain knowledge (architecture, concepts, commands) with a live snapshot + * of user state (active profile, recent launches, doctor issues, version) + * to produce one structured prompt string under ~4000 tokens. + */ + +import type { ArcConfig, Profile } from "../types.js"; +import { ARC_KNOWLEDGE } from "./static.js"; +import { FEATURES_INDEX } from "./feature-index.js"; + +export interface LaunchHistoryEntry { + profile: string; + tool?: string; + mode?: "default" | "bare" | "native" | "worker"; + startedAt: string; + exitCode?: number; + durationMs?: number; +} + +export type PermissionMode = "read-only" | "supervised" | "autonomous"; + +export interface KnowledgeContext { + config: ArcConfig; + recentLaunches: LaunchHistoryEntry[]; + activeProfile: Profile | null; + arcVersion: string; + doctorIssues?: string[]; + permissionMode: PermissionMode; + toolCategories: string[]; +} + +/** Rough token estimator — 4 chars per token. */ +export function estimateTokens(text: string): number { + if (!text) return 0; + return Math.ceil(text.length / 4); +} + +const TOKEN_BUDGET = 4000; +const MAX_RECENT_LAUNCHES = 3; +const MAX_DOCTOR_ISSUES = 8; +const MAX_CONCEPTS = 14; +const MAX_COMMANDS_IN_PROMPT = 32; + +function truncate(text: string, maxChars: number): string { + if (text.length <= maxChars) return text; + return text.slice(0, Math.max(0, maxChars - 3)) + "..."; +} + +function formatLaunch(l: LaunchHistoryEntry): string { + const mode = l.mode ? ` [${l.mode}]` : ""; + const tool = l.tool ? ` (${l.tool})` : ""; + const code = + typeof l.exitCode === "number" ? ` exit=${l.exitCode}` : ""; + const dur = + typeof l.durationMs === "number" ? ` ${Math.round(l.durationMs / 1000)}s` : ""; + return ` - ${l.startedAt} ${l.profile}${tool}${mode}${code}${dur}`; +} + +function section(title: string, body: string): string { + return `## ${title}\n${body}`.trim(); +} + +/** + * Compose the full assistant system prompt from static knowledge + live + * context. Output is structured in 6 sections. Hard-capped to stay within + * a ~4000-token budget; long inputs (recentLaunches, doctorIssues) are + * truncated, and overall output is clamped as a final safety. + */ +export function buildSystemPrompt(ctx: KnowledgeContext): string { + // 1. Identity + const identity = section( + "Identity", + [ + "You are the ARC assistant — an in-app helper embedded in ARC", + "(Agent Runtime Control). You understand ARC's profile model,", + "adapters, hook pipeline, shared layer, and dashboard. You help", + "the user inspect, configure, launch, and orchestrate agent", + "runtimes through ARC's commands and tools.", + ].join(" "), + ); + + // 2. Capabilities + const caps = ctx.toolCategories.length + ? ctx.toolCategories.map((c) => `- ${c}`).join("\n") + : "- (no tools available in this session)"; + const capabilities = section( + "Capabilities", + `You have access to tools in the following categories (schemas provided separately):\n${caps}`, + ); + + // 3. Architecture brief + const arch = truncate(ARC_KNOWLEDGE.architecture, 1400); + const architecture = section("ARC architecture", arch); + + // 4. Concepts glossary (trimmed) + const conceptKeys = Object.keys(ARC_KNOWLEDGE.concepts).slice(0, MAX_CONCEPTS); + const glossary = conceptKeys + .map((k) => `- **${k}**: ${ARC_KNOWLEDGE.concepts[k]}`) + .join("\n"); + const concepts = section("Key concepts", glossary); + + // 5. Live state snapshot + const profileCount = Object.keys(ctx.config.profiles).length; + const activeName = ctx.config.activeProfile || "(none)"; + const activeTool = ctx.activeProfile?.tool ?? "unknown"; + const activeAuth = ctx.activeProfile?.authType ?? "unknown"; + const sharedOn = ctx.activeProfile?.useShared ? "yes" : "no"; + const enforcement = ctx.activeProfile?.enforcement ?? "log"; + + const launches = ctx.recentLaunches + .slice(0, MAX_RECENT_LAUNCHES) + .map(formatLaunch) + .join("\n"); + + const issues = (ctx.doctorIssues ?? []).slice(0, MAX_DOCTOR_ISSUES); + const issuesBlock = issues.length + ? issues.map((i) => ` - ${truncate(i, 160)}`).join("\n") + : " - none"; + + const shippedFeatureCount = FEATURES_INDEX.filter( + (f) => f.status === "shipped", + ).length; + + const liveState = section( + "Live state", + [ + `- ARC version: ${ctx.arcVersion}`, + `- Permission mode: ${ctx.permissionMode}`, + `- Active profile: ${activeName} (tool=${activeTool}, auth=${activeAuth}, shared=${sharedOn}, enforcement=${enforcement})`, + `- Total profiles: ${profileCount}`, + `- Shipped features indexed: ${shippedFeatureCount}`, + `- Recent launches (last ${MAX_RECENT_LAUNCHES}):`, + launches || " - none", + `- Doctor issues:`, + issuesBlock, + ].join("\n"), + ); + + // 6. Behavior rules + const modeRules: Record = { + "read-only": [ + "You are in read-only mode. You may inspect ARC state but you MUST NOT", + "call tools that modify profiles, credentials, or run subprocesses.", + "If the user asks for a destructive action, explain what command", + "they would run instead and stop.", + ].join(" "), + supervised: [ + "You are in supervised mode. For destructive actions (profile delete,", + "credential swap, backup restore, shared push/pull, factory abort) you", + "MUST state your plan and wait for user confirmation before invoking", + "the tool. Non-destructive reads may proceed without confirmation.", + ].join(" "), + autonomous: [ + "You are in autonomous mode. You may invoke tools without per-action", + "confirmation, but still narrate what you are doing, and stop", + "immediately if a tool returns an error or unexpected state.", + ].join(" "), + }; + + const behavior = section( + "Behavior rules", + [ + "- Prefer tools over speculation. If the user asks about their state,", + " read it through the provided tools rather than guessing.", + "- Prefer asking one clarifying question over guessing when intent is", + " ambiguous about which profile or tool is involved.", + "- Cite the exact `arc ...` command when recommending an action, so", + " the user can run it outside the assistant if they prefer.", + "- Treat `swap`, `backup restore`, `profile delete`, `shared push`,", + " `factory abort`, and `uninstall` as destructive.", + "- Never invent commands, flags, or concepts. If unsure, say so.", + "", + modeRules[ctx.permissionMode], + ].join("\n"), + ); + + // Compose + enforce token cap + let prompt = [ + identity, + capabilities, + architecture, + concepts, + liveState, + behavior, + ].join("\n\n"); + + // Final safety clamp. Aim for 2500-3000; hard cap at 4000. + if (estimateTokens(prompt) > TOKEN_BUDGET) { + const maxChars = TOKEN_BUDGET * 4; + prompt = truncate(prompt, maxChars); + } + + return prompt; +} diff --git a/packages/core/src/knowledge/static.ts b/packages/core/src/knowledge/static.ts new file mode 100644 index 0000000..79268ac --- /dev/null +++ b/packages/core/src/knowledge/static.ts @@ -0,0 +1,485 @@ +/** + * Static ARC knowledge: purpose, architecture, concept glossary, and command + * catalog. This payload is composed into the assistant system prompt by + * buildSystemPrompt() at runtime. + */ + +export type CommandCategory = + | "profile" + | "launch" + | "diagnostic" + | "orchestration" + | "data" + | "utility"; + +export interface CommandDoc { + name: string; + description: string; + examples?: string[]; + category: CommandCategory; +} + +export interface ArcKnowledge { + purpose: string; + architecture: string; + concepts: Record; + commands: CommandDoc[]; + docLinks: Record; +} + +/** + * Curated catalog of ARC CLI commands. Not auto-generated; kept in sync with + * packages/cli/src/cli.ts by hand. Covers the most common commands + key + * subcommands. Not exhaustive (~45 entries). + */ +export const COMMANDS_CATALOG: CommandDoc[] = [ + // ─── Profile management ───────────────────────────────────────────── + { + name: "profile list", + description: "List all registered profiles with tool, auth type, and active marker.", + category: "profile", + examples: ["arc profile list", "arc ls"], + }, + { + name: "profile show", + description: "Show full profile record (env overrides, hooks, shared layer state).", + category: "profile", + examples: ["arc profile show", "arc profile show work"], + }, + { + name: "profile switch", + description: "Set the active profile. Subsequent launches use its config dir and env.", + category: "profile", + examples: ["arc profile switch work", "arc use work"], + }, + { + name: "profile create", + description: "Create a new profile. Prompts for tool binary, auth type, and description.", + category: "profile", + examples: [ + "arc profile create work", + "arc create work --tool claude --auth-type oauth", + ], + }, + { + name: "profile clone", + description: "Copy an existing profile's config dir into a new profile with a new name.", + category: "profile", + examples: ["arc profile clone work work-staging"], + }, + { + name: "profile delete", + description: "Delete a profile and its isolated config dir. Prompts for confirmation.", + category: "profile", + examples: ["arc profile delete old-profile"], + }, + { + name: "profile import", + description: "Interactive import of existing tool installs (~/.claude, ~/.codex, etc.).", + category: "profile", + examples: ["arc profile import"], + }, + { + name: "profile export", + description: "Export profile config dir as a tarball for backup or transfer.", + category: "profile", + examples: ["arc profile export work ./work.tgz"], + }, + { + name: "profile import-file", + description: "Restore a profile from a tarball produced by profile export.", + category: "profile", + examples: ["arc profile import-file ./work.tgz"], + }, + { + name: "profile clear-active", + description: "Unset the active profile without deleting it.", + category: "profile", + examples: ["arc profile clear-active"], + }, + + // ─── Launch ───────────────────────────────────────────────────────── + { + name: "launch", + description: + "Launch the active profile's tool (or a named one). Default mode wraps the tool with ARC hooks.", + category: "launch", + examples: ["arc launch", "arc launch work"], + }, + { + name: "launch --bare", + description: + "Launch the tool binary with the profile's env + configDir but no ARC wrapping or hooks.", + category: "launch", + examples: ["arc launch --bare", "arc launch work --bare"], + }, + { + name: "launch --native", + description: + "Use the tool's native plugin/adapter integration when available (e.g. Claude Code plugin).", + category: "launch", + examples: ["arc launch --native"], + }, + { + name: "launch --worker", + description: + "Launch in worker permission mode (restricted allowlist, non-interactive).", + category: "launch", + examples: ["arc launch --worker"], + }, + { + name: "run", + description: "Run a one-shot prompt against the active profile's tool and print the result.", + category: "launch", + examples: ['arc run "summarize this repo"'], + }, + { + name: "exec", + description: "Run a shell command with the profile's env vars applied (configDir, tool env).", + category: "launch", + examples: ["arc exec -- claude --version"], + }, + { + name: "shell", + description: "Open a subshell with the profile's env vars applied.", + category: "launch", + examples: ["arc shell"], + }, + + // ─── Diagnostic ───────────────────────────────────────────────────── + { + name: "doctor", + description: + "Run diagnostic checks (binary on PATH, auth present, config dir writable). Prints repair hints.", + category: "diagnostic", + examples: ["arc doctor"], + }, + { + name: "health", + description: "Structured JSON health report — used by the TUI and dashboard.", + category: "diagnostic", + examples: ["arc health"], + }, + { + name: "logs", + description: "Tail or show the ARC activity log (~/.arc/activity.log).", + category: "diagnostic", + examples: ["arc logs", "arc logs --tail 50"], + }, + { + name: "which", + description: "Print the active profile's resolved config dir and tool binary path.", + category: "diagnostic", + examples: ["arc which"], + }, + + // ─── Orchestration ────────────────────────────────────────────────── + { + name: "tasks list", + description: "List persisted orchestration tasks (delegated, scheduled, completed).", + category: "orchestration", + examples: ["arc tasks list"], + }, + { + name: "tasks create", + description: "Create a new task with description, priority, and optional assignee.", + category: "orchestration", + examples: ['arc tasks create "refactor auth flow"'], + }, + { + name: "tasks stop", + description: "Cancel a running task.", + category: "orchestration", + examples: ["arc tasks stop t-42"], + }, + { + name: "sessions list", + description: "List session threads (running, paused, completed).", + category: "orchestration", + examples: ["arc sessions list"], + }, + { + name: "sessions resume", + description: "Resume the most recent session or a specific session id.", + category: "orchestration", + examples: ["arc sessions resume", "arc sessions resume s-12"], + }, + { + name: "factory status", + description: "Show Dark Factory wave status (parallel agent spec execution).", + category: "orchestration", + examples: ["arc factory status"], + }, + { + name: "remote list", + description: "List registered remote agents (MCP/HTTP bridges).", + category: "orchestration", + examples: ["arc remote list"], + }, + + // ─── Data ─────────────────────────────────────────────────────────── + { + name: "backup create", + description: "Create a compressed snapshot of ~/.arc (config + profile dirs).", + category: "data", + examples: ["arc backup create"], + }, + { + name: "backup list", + description: "List available backup snapshots with timestamps and sizes.", + category: "data", + examples: ["arc backup list"], + }, + { + name: "backup restore", + description: "Restore a backup snapshot. Prompts before overwriting current state.", + category: "data", + examples: ["arc backup restore 2026-04-17T12-00"], + }, + { + name: "shared status", + description: "Show which profiles are subscribed to the shared layer and what it contains.", + category: "data", + examples: ["arc shared status"], + }, + { + name: "shared pull", + description: + "Pull a profile's MCPs/commands/CLAUDE.md/memory/projects into ~/.arc/shared/.", + category: "data", + examples: ["arc shared pull work"], + }, + { + name: "shared push", + description: "Push (enable) the shared layer onto a profile so it sees shared resources.", + category: "data", + examples: ["arc shared enable work"], + }, + { + name: "memory list", + description: "List persistent memory entries (scoped, typed, scored).", + category: "data", + examples: ["arc memory list"], + }, + { + name: "memory search", + description: "Full-text search memory entries.", + category: "data", + examples: ['arc memory search "oauth refresh"'], + }, + { + name: "skills list", + description: "List loaded skills and their contract metadata.", + category: "data", + examples: ["arc skills list"], + }, + + // ─── Utility ──────────────────────────────────────────────────────── + { + name: "mcp connect", + description: "Add an MCP server to the active profile's config.", + category: "utility", + examples: ["arc mcp connect github https://mcp.github.dev"], + }, + { + name: "mcp list", + description: "List MCP servers configured for the active profile.", + category: "utility", + examples: ["arc mcp list"], + }, + { + name: "swap capture", + description: + "[experimental] Capture the current tool's auth credentials into a named snapshot.", + category: "utility", + examples: ["arc swap capture personal"], + }, + { + name: "swap to", + description: + "[experimental] Swap the live tool credentials to a named snapshot without touching MCPs/settings.", + category: "utility", + examples: ["arc swap to work"], + }, + { + name: "instructions show", + description: "Show the agent instructions injected as ARC_AGENT_INSTRUCTIONS at launch.", + category: "utility", + examples: ["arc instructions show"], + }, + { + name: "instructions set", + description: "Set inline agent instructions for the active profile.", + category: "utility", + examples: ['arc instructions set "Always run typecheck before commit"'], + }, + { + name: "instructions edit", + description: "Open the instructions file in $EDITOR.", + category: "utility", + examples: ["arc instructions edit"], + }, + { + name: "instructions clear", + description: "Remove agent instructions from the active profile.", + category: "utility", + examples: ["arc instructions clear"], + }, + { + name: "provider set", + description: + "Configure an OpenAI-compatible provider (baseUrl, model, apiKeyEnvVar) on the active profile.", + category: "utility", + examples: ["arc provider set --preset openrouter"], + }, + { + name: "provider show", + description: "Show the active profile's configured provider.", + category: "utility", + examples: ["arc provider show"], + }, + { + name: "provider clear", + description: "Remove the provider config from the active profile.", + category: "utility", + examples: ["arc provider clear"], + }, + { + name: "provider presets", + description: + "List built-in provider presets (OpenRouter, Ollama, LM Studio, Together, Groq, MiniMax, DeepSeek).", + category: "utility", + examples: ["arc provider presets"], + }, + { + name: "dashboard", + description: "Open the ARC web dashboard in the browser.", + category: "utility", + examples: ["arc dashboard"], + }, + { + name: "shell-init", + description: "Print shell init snippet (completions, PROMPT_COMMAND integration).", + category: "utility", + examples: ["arc shell-init bash"], + }, + { + name: "update", + description: "Self-update ARC via npm install -g.", + category: "utility", + examples: ["arc update"], + }, +]; + +export const ARC_KNOWLEDGE: ArcKnowledge = { + purpose: [ + "ARC (Agent Runtime Control) is a CLI and TUI for managing multiple agent", + "runtimes — Claude Code, Gemini CLI, Codex CLI, OpenClaw, and any OpenAI", + "compatible tool — side by side on a single machine. Each runtime lives", + "in an isolated profile with its own config directory, credentials, MCP", + "servers, hooks, and environment variables. Switching profiles is a", + "single command; launching a profile rewrites the target tool's HOME-", + "equivalent env vars (CLAUDE_CONFIG_DIR, GEMINI_CLI_HOME, CODEX_HOME,", + "HERMES_HOME) so the tool reads only that profile's state.", + "", + "The problem ARC solves: agent CLIs assume a single global install and", + "a single credential set. Users who need work/personal separation,", + "multiple API keys, per-project MCP sets, or reproducible setups across", + "machines hit painful collisions. ARC removes the collisions by", + "scoping every piece of state to a profile and providing operations —", + "clone, import, export, shared layer, credential hot-swap, backup —", + "that treat profiles as first-class artifacts. On top of that base,", + "ARC adds an orchestration layer (hook pipeline, roundtable, task", + "delegation, risk classification) so the same profiles can be used", + "both for interactive sessions and for supervised multi-agent work.", + ].join(" "), + + architecture: [ + "Monorepo laid out as pnpm workspaces under packages/: core (profile", + "model, hooks, memory, sessions, skills, tasks, telemetry, sync,", + "factory, permissions), cli (Commander.js surface + Ink TUI),", + "adapter-claude, adapter-openclaw, mcp, and dashboard (web UI).", + "", + "Profiles are records in ~/.arc/config.json that point at an isolated", + "config directory under ~/.arc/profiles//. At launch time the", + "runtime resolves the profile, rewrites env (HOME-equivalents per", + "tool, plus envOverrides and ARC_AGENT_INSTRUCTIONS), and invokes the", + "right adapter.", + "", + "Adapters implement a common interface (detect, launch, hook into", + "tool output) for each backend: Claude Code (SDK + plugin + hooks),", + "Codex CLI, Gemini CLI, OpenClaw (native plugin), Hermes (MCP bridge),", + "OpenAI-compatible (custom baseUrl/model), Generic (fallback).", + "", + "The hook pipeline is a priority-ordered bus of 8 hooks (source-", + "classify → risk detect → interagent routing → supervision gate →", + "agent execution → post-verify → audit → roundtable). Each profile", + "selects an enforcement mode (off/log/advise/enforce).", + "", + "The shared layer lives under ~/.arc/shared/ and syncs MCP servers,", + "slash commands, CLAUDE.md, memory, and projects across profiles", + "that opt in. Pull copies from a profile into shared; enable links", + "shared into a profile via directory links or merged files.", + "", + "MCP integration: MCP servers are declared per profile and reachable", + "from whichever tool is launched. The dashboard is a React app that", + "reads ~/.arc via a local HTTP bridge and surfaces sessions, traces,", + "risk, tasks, skills, memory, agents, factory, profiles, diagnostics,", + "sync, and plugins.", + "", + "The roundtable module runs multi-agent discussions for a single", + "request, collecting agent outputs and synthesizing a verdict through", + "the hook bus so the orchestration and adapter layers share one path.", + ].join(" "), + + concepts: { + profile: + "A named runtime record (tool, auth type, config dir, env, hooks) stored in ~/.arc/config.json.", + "active profile": + "The profile ARC uses by default for launch/run/exec. Set via `arc use` or `arc profile switch`.", + "config dir": + "The isolated directory under ~/.arc/profiles// that holds tool-native config, credentials, and history.", + "bare launch": + "Launch mode that sets the profile env and invokes the tool binary directly — no hooks, no wrapping.", + "launch mode": + "One of: default (hook-wrapped), --bare (no wrapping), --native (tool-native plugin integration), --worker (restricted permissions).", + adapter: + "Per-tool integration module (claude, codex, gemini, openclaw, hermes, openai-compat, generic) implementing detect + launch + hook.", + "shared layer": + "~/.arc/shared/ holding MCP servers, commands, CLAUDE.md, memory, and projects that multiple profiles can opt into.", + "hook pipeline": + "Priority-ordered bus of 8 hooks (source-classify, risk, routing, supervision, exec, post-verify, audit, roundtable) run around every agent turn.", + "enforcement mode": + "Per-profile hook behavior: off (skip), log (observe), advise (inject suggestions), enforce (block on failures).", + roundtable: + "Multi-agent discussion protocol: several agents answer the same prompt, results are synthesized into a single verdict.", + "permission mode": + "read-only | supervised | autonomous — governs whether the assistant may call tools and whether destructive ops need confirmation.", + "risk tier": + "Output of the risk-detection hook: low | medium | high | critical. Higher tiers raise supervision requirements.", + "credential hot-swap": + "[experimental] `arc swap` — capture/restore tool auth credentials without touching MCPs, settings, or history.", + "agent instructions": + "Per-profile prompt text resolved at launch and injected as ARC_AGENT_INSTRUCTIONS env var.", + telemetry: + "OpenTelemetry spans around sessions, preflight, hooks, agent execution, tool use, postflight, and circuit breakers.", + factory: + "Dark Factory Mode — parallel wave execution of agent specs for batch/background work.", + "profile inheritance": + "A profile may set `inherits: ` to reuse env, hooks, and config fields unless overridden.", + }, + + commands: COMMANDS_CATALOG, + + docLinks: { + "getting started": "/docs/getting-started", + profiles: "/docs/profiles", + authentication: "/docs/authentication", + configuration: "/docs/configuration", + advanced: "/docs/advanced", + "shell integration": "/docs/shell-integration", + troubleshooting: "/docs/troubleshooting", + development: "/docs/development", + spec: "/docs/spec/SPEC", + }, +}; diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index e5b7865..1708d19 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -72,7 +72,14 @@ export interface ArcSettings { export interface ArcConfig { version: 1; - activeProfile: string; + /** + * Name of the active profile, or null when no profile is active. + * A null active profile means `arc launch` / tool commands with no explicit + * profile argument will fall back to bare mode (native tool launch, no env + * injection). Use `arc profile switch ` or `arc profile clear-active` + * to change this. + */ + activeProfile: string | null; profiles: Record; profileOrder?: string[]; theme?: "dark" | "light"; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 94afae2..61c5a39 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -60,6 +60,9 @@ importers: vitest: specifier: ^4.1.2 version: 4.1.2(@types/node@25.5.2)(vite@8.0.3(@emnapi/core@1.9.1)(@emnapi/runtime@1.9.1)(@types/node@25.5.2)(esbuild@0.27.3)(jiti@2.6.1)(tsx@4.21.0)) + zod: + specifier: ^3.25.0 + version: 3.25.76 optionalDependencies: '@inquirer/prompts': specifier: ^8 @@ -92,7 +95,11 @@ importers: specifier: workspace:* version: link:../mcp - packages/core: {} + packages/core: + dependencies: + zod: + specifier: ^3.25.0 + version: 3.25.76 packages/dashboard: dependencies: diff --git a/site/src/components/Features.tsx b/site/src/components/Features.tsx index 389882e..0d31e5b 100644 --- a/site/src/components/Features.tsx +++ b/site/src/components/Features.tsx @@ -28,7 +28,7 @@ const features: Feature[] = [ label: "ADAPTERS", title: "Multi-Runtime", description: - "Claude Code, Codex CLI, Gemini CLI, OpenClaw, and a generic adapter for anything that speaks MCP or HTTP.", + "Claude Code, Codex CLI, Gemini CLI, OpenClaw, Hermes, and a generic adapter. Native launch for full TUI handoff, worker mode for orchestration, or bare passthrough with no profile.", }, { icon: , diff --git a/tests/integration/profile.test.ts b/tests/integration/profile.test.ts index bb60b99..3a7ebad 100644 --- a/tests/integration/profile.test.ts +++ b/tests/integration/profile.test.ts @@ -34,7 +34,8 @@ describe("Profile CRUD", () => { const { loadConfig } = await import("@axiom-labs/arc-core"); const config = loadConfig(); expect(config.version).toBe(1); - expect(config.activeProfile).toBe("default"); + // New installs start with null activeProfile (bare-mode default). + expect(config.activeProfile).toBeNull(); expect(config.profiles).toEqual({}); }); diff --git a/tests/unit/agent/arc-tools.test.ts b/tests/unit/agent/arc-tools.test.ts new file mode 100644 index 0000000..315d69a --- /dev/null +++ b/tests/unit/agent/arc-tools.test.ts @@ -0,0 +1,298 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { + ToolRegistry, + registerArcTools, + ARC_TOOLS, +} from "../../../packages/core/src/agent/index.js"; +import type { + ToolContext, +} from "../../../packages/core/src/agent/types.js"; +import type { ArcConfig, Profile } from "../../../packages/core/src/types.js"; + +// Don't silence logging — writeLogEvent writes under ARC_DIR which we control. + +function baseProfile(overrides: Partial = {}): Profile { + return { + authType: "oauth", + tool: "claude", + configDir: "/tmp/fake-does-not-exist", + createdAt: "2026-01-01T00:00:00Z", + ...overrides, + }; +} + +function writeConfig(dir: string, cfg: ArcConfig): void { + fs.mkdirSync(dir, { recursive: true }); + fs.writeFileSync(path.join(dir, "config.json"), JSON.stringify(cfg, null, 2), "utf-8"); +} + +function makeCtx(mode: "read-only" | "supervised" | "autonomous" = "autonomous"): ToolContext { + return { + mode, + confirm: vi.fn(async () => true), + log: vi.fn(), + }; +} + +let tmpDir: string; +let prevEnv: string | undefined; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "arc-agent-test-")); + prevEnv = process.env["ARC_DIR"]; + process.env["ARC_DIR"] = tmpDir; +}); + +afterEach(() => { + if (prevEnv === undefined) delete process.env["ARC_DIR"]; + else process.env["ARC_DIR"] = prevEnv; + try { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } catch { + // ignore + } +}); + +describe("registerArcTools", () => { + it("registers all Phase 2 tools (≥15)", () => { + const registry = new ToolRegistry(); + registerArcTools(registry); + const list = registry.list(); + expect(list.length).toBeGreaterThanOrEqual(15); + }); + + it("includes read, write, and dangerous tiers", () => { + const registry = new ToolRegistry(); + registerArcTools(registry); + const tiers = new Set(registry.list().map((t) => t.permission)); + expect(tiers.has("read")).toBe(true); + expect(tiers.has("write")).toBe(true); + expect(tiers.has("dangerous")).toBe(true); + }); + + it("read-only mode hides write/dangerous tools", () => { + const registry = new ToolRegistry(); + registerArcTools(registry); + const schemas = registry.getSchemas("read-only"); + for (const s of schemas) { + expect(s.permission).toBe("read"); + } + }); +}); + +describe("list_profiles tool", () => { + it("returns the profiles from the temp config", async () => { + const cfg: ArcConfig = { + version: 1, + activeProfile: "alpha", + profiles: { + alpha: baseProfile({ description: "the alpha profile" }), + beta: baseProfile({ tool: "gemini" }), + }, + }; + writeConfig(tmpDir, cfg); + + const registry = new ToolRegistry(); + registerArcTools(registry); + const result = await registry.execute("list_profiles", {}, makeCtx()); + expect(result.ok).toBe(true); + if (result.ok) { + const profiles = result.output as Array<{ name: string }>; + const names = profiles.map((p) => p.name).sort(); + expect(names).toEqual(["alpha", "beta"]); + } + }); + + it("returns empty when no config exists yet", async () => { + const registry = new ToolRegistry(); + registerArcTools(registry); + const result = await registry.execute("list_profiles", {}, makeCtx()); + expect(result.ok).toBe(true); + if (result.ok) expect(result.output).toEqual([]); + }); +}); + +describe("show_profile tool", () => { + it("returns the profile record", async () => { + writeConfig(tmpDir, { + version: 1, + activeProfile: "alpha", + profiles: { alpha: baseProfile({ description: "desc" }) }, + }); + const registry = new ToolRegistry(); + registerArcTools(registry); + const result = await registry.execute("show_profile", { name: "alpha" }, makeCtx()); + expect(result.ok).toBe(true); + if (result.ok) { + const prof = result.output as { name: string; description?: string }; + expect(prof.name).toBe("alpha"); + expect(prof.description).toBe("desc"); + } + }); + + it("errors when profile is missing", async () => { + writeConfig(tmpDir, { + version: 1, + activeProfile: "alpha", + profiles: { alpha: baseProfile() }, + }); + const registry = new ToolRegistry(); + registerArcTools(registry); + const result = await registry.execute("show_profile", { name: "ghost" }, makeCtx()); + expect(result.ok).toBe(false); + if (!result.ok) expect(result.error).toMatch(/not found/); + }); +}); + +describe("get_active_profile tool", () => { + it("returns active profile record", async () => { + writeConfig(tmpDir, { + version: 1, + activeProfile: "alpha", + profiles: { alpha: baseProfile() }, + }); + const registry = new ToolRegistry(); + registerArcTools(registry); + const result = await registry.execute("get_active_profile", {}, makeCtx()); + expect(result.ok).toBe(true); + if (result.ok) { + const prof = result.output as { name: string } | null; + expect(prof?.name).toBe("alpha"); + } + }); +}); + +describe("switch_active_profile tool", () => { + it("writes the new active profile to config", async () => { + writeConfig(tmpDir, { + version: 1, + activeProfile: "alpha", + profiles: { + alpha: baseProfile(), + beta: baseProfile(), + }, + }); + const registry = new ToolRegistry(); + registerArcTools(registry); + const result = await registry.execute( + "switch_active_profile", + { name: "beta" }, + makeCtx("autonomous"), + ); + expect(result.ok).toBe(true); + const written = JSON.parse( + fs.readFileSync(path.join(tmpDir, "config.json"), "utf-8"), + ) as ArcConfig; + expect(written.activeProfile).toBe("beta"); + }); + + it("is blocked in read-only mode", async () => { + writeConfig(tmpDir, { + version: 1, + activeProfile: "alpha", + profiles: { alpha: baseProfile(), beta: baseProfile() }, + }); + const registry = new ToolRegistry(); + registerArcTools(registry); + const result = await registry.execute( + "switch_active_profile", + { name: "beta" }, + makeCtx("read-only"), + ); + expect(result.ok).toBe(false); + if (!result.ok) expect(result.blocked).toBe(true); + }); +}); + +describe("set_profile_flags tool", () => { + it("replaces launchArgs array", async () => { + writeConfig(tmpDir, { + version: 1, + activeProfile: "alpha", + profiles: { alpha: baseProfile() }, + }); + const registry = new ToolRegistry(); + registerArcTools(registry); + const result = await registry.execute( + "set_profile_flags", + { profileName: "alpha", flags: ["--verbose", "--no-color"] }, + makeCtx("autonomous"), + ); + expect(result.ok).toBe(true); + const written = JSON.parse( + fs.readFileSync(path.join(tmpDir, "config.json"), "utf-8"), + ) as ArcConfig; + expect(written.profiles["alpha"].launchArgs).toEqual(["--verbose", "--no-color"]); + }); +}); + +describe("delete_profile tool", () => { + it("removes the profile from config (dangerous tier)", async () => { + writeConfig(tmpDir, { + version: 1, + activeProfile: "alpha", + profiles: { + alpha: baseProfile(), + beta: baseProfile(), + }, + }); + const registry = new ToolRegistry(); + registerArcTools(registry); + const result = await registry.execute( + "delete_profile", + { name: "beta" }, + makeCtx("autonomous"), + ); + expect(result.ok).toBe(true); + const written = JSON.parse( + fs.readFileSync(path.join(tmpDir, "config.json"), "utf-8"), + ) as ArcConfig; + expect(written.profiles["beta"]).toBeUndefined(); + expect(written.profiles["alpha"]).toBeDefined(); + }); + + it("requires confirmation in supervised mode", async () => { + writeConfig(tmpDir, { + version: 1, + activeProfile: "alpha", + profiles: { alpha: baseProfile(), beta: baseProfile() }, + }); + const registry = new ToolRegistry(); + registerArcTools(registry); + const confirm = vi.fn(async () => false); + const result = await registry.execute( + "delete_profile", + { name: "beta" }, + { mode: "supervised", confirm, log: () => {} }, + ); + expect(confirm).toHaveBeenCalledOnce(); + expect(result.ok).toBe(false); + if (!result.ok) expect(result.blocked).toBe(true); + }); +}); + +describe("get_arc_version tool", () => { + it("returns a version string", async () => { + const registry = new ToolRegistry(); + registerArcTools(registry); + const result = await registry.execute("get_arc_version", {}, makeCtx()); + expect(result.ok).toBe(true); + if (result.ok) { + const out = result.output as { version: string }; + expect(typeof out.version).toBe("string"); + } + }); +}); + +describe("ARC_TOOLS catalog", () => { + it("exposes every registered tool by key", () => { + const registry = new ToolRegistry(); + registerArcTools(registry); + for (const [key, tool] of Object.entries(ARC_TOOLS)) { + expect(registry.get(key)?.name).toBe(tool.name); + } + }); +}); diff --git a/tests/unit/agent/loop.test.ts b/tests/unit/agent/loop.test.ts new file mode 100644 index 0000000..c92b81f --- /dev/null +++ b/tests/unit/agent/loop.test.ts @@ -0,0 +1,156 @@ +import { describe, it, expect, vi } from "vitest"; +import { z } from "zod"; +import { ToolRegistry } from "../../../packages/core/src/agent/registry.js"; +import { runAgent } from "../../../packages/core/src/agent/loop.js"; +import type { + AgentEvent, + Tool, + ToolContext, +} from "../../../packages/core/src/agent/types.js"; +import type { + AgentChunk, + AgentClient, +} from "../../../packages/core/src/agent-client/index.js"; + +vi.mock("../../../packages/core/src/logging.js", () => ({ + writeLogEvent: vi.fn(), +})); + +// A tiny mock client that replays a scripted AgentChunk stream. +function mockClient(chunks: AgentChunk[]): AgentClient { + return { + async *send() { + for (const c of chunks) yield c; + }, + async shutdown() {}, + }; +} + +const echoTool: Tool<{ value: string }, { echoed: string }> = { + name: "echo", + description: "return the input value", + permission: "read", + schema: z.object({ value: z.string() }), + handler: async ({ value }) => ({ echoed: value }), +}; + +function makeCtx(): ToolContext { + return { + mode: "supervised", + confirm: vi.fn(async () => true), + log: vi.fn(), + }; +} + +async function collect(it: AsyncIterable): Promise { + const out: AgentEvent[] = []; + for await (const ev of it) out.push(ev); + return out; +} + +describe("runAgent loop", () => { + it("emits text chunks and terminates on done", async () => { + const registry = new ToolRegistry(); + const client = mockClient([ + { type: "text", content: "hello" }, + { type: "text", content: " world" }, + { type: "done", reason: "end_turn" }, + ]); + const events = await collect( + runAgent({ client, registry, ctx: makeCtx() }, "say hi"), + ); + expect(events.map((e) => e.type)).toEqual(["text", "text", "done"]); + const done = events.at(-1); + if (done?.type === "done") expect(done.reason).toBe("end_turn"); + }); + + it("dispatches tool_call through registry and emits tool_result", async () => { + const registry = new ToolRegistry(); + registry.register(echoTool); + const client = mockClient([ + { type: "tool_call", id: "c1", tool: "echo", input: { value: "hi" } }, + { type: "done", reason: "end_turn" }, + ]); + const events = await collect(runAgent({ client, registry, ctx: makeCtx() }, "do it")); + + const call = events.find((e) => e.type === "tool_call"); + const result = events.find((e) => e.type === "tool_result"); + expect(call).toBeDefined(); + expect(result).toBeDefined(); + if (result?.type === "tool_result") { + expect(result.tool).toBe("echo"); + expect(result.result.ok).toBe(true); + if (result.result.ok) { + expect(result.result.output).toEqual({ echoed: "hi" }); + } + } + }); + + it("surfaces unknown-tool errors via tool_result", async () => { + const registry = new ToolRegistry(); + const client = mockClient([ + { type: "tool_call", id: "c1", tool: "missing", input: {} }, + { type: "done", reason: "end_turn" }, + ]); + const events = await collect(runAgent({ client, registry, ctx: makeCtx() }, "?")); + const result = events.find((e) => e.type === "tool_result"); + if (result?.type === "tool_result") { + expect(result.result.ok).toBe(false); + } + }); + + it("enforces maxTurns cap", async () => { + const registry = new ToolRegistry(); + registry.register(echoTool); + const client = mockClient([ + { type: "tool_call", id: "c1", tool: "echo", input: { value: "a" } }, + { type: "tool_call", id: "c2", tool: "echo", input: { value: "b" } }, + { type: "tool_call", id: "c3", tool: "echo", input: { value: "c" } }, + { type: "done", reason: "end_turn" }, + ]); + const events = await collect( + runAgent({ client, registry, ctx: makeCtx(), maxTurns: 1 }, "loop"), + ); + const done = events.at(-1); + if (done?.type === "done") expect(done.reason).toBe("max_turns"); + // First call completes, second trips the cap and emits blocked result + done. + const results = events.filter((e) => e.type === "tool_result"); + expect(results.length).toBe(2); + }); + + it("emits synthetic done:stop when stream ends without explicit done", async () => { + const registry = new ToolRegistry(); + const client = mockClient([{ type: "text", content: "partial" }]); + const events = await collect(runAgent({ client, registry, ctx: makeCtx() }, "...")); + const done = events.at(-1); + expect(done?.type).toBe("done"); + if (done?.type === "done") expect(done.reason).toBe("stop"); + }); + + it("handles client.send throwing synchronously", async () => { + const registry = new ToolRegistry(); + const client: AgentClient = { + send: () => { + throw new Error("boom"); + }, + shutdown: async () => {}, + }; + const events = await collect(runAgent({ client, registry, ctx: makeCtx() }, "go")); + const err = events.find((e) => e.type === "error"); + expect(err).toBeDefined(); + if (err?.type === "error") expect(err.message).toMatch(/boom/); + const done = events.at(-1); + if (done?.type === "done") expect(done.reason).toBe("error"); + }); + + it("propagates thinking chunks", async () => { + const registry = new ToolRegistry(); + const client = mockClient([ + { type: "thinking", content: "hmm" }, + { type: "text", content: "ok" }, + { type: "done", reason: "end_turn" }, + ]); + const events = await collect(runAgent({ client, registry, ctx: makeCtx() }, "?")); + expect(events.some((e) => e.type === "thinking")).toBe(true); + }); +}); diff --git a/tests/unit/agent/permissions.test.ts b/tests/unit/agent/permissions.test.ts new file mode 100644 index 0000000..ff21af9 --- /dev/null +++ b/tests/unit/agent/permissions.test.ts @@ -0,0 +1,73 @@ +import { describe, it, expect } from "vitest"; +import { z } from "zod"; +import { + canUseTool, + needsConfirmation, +} from "../../../packages/core/src/agent/permissions.js"; +import type { + PermissionMode, + Tool, + ToolPermission, +} from "../../../packages/core/src/agent/types.js"; + +function mkTool(permission: ToolPermission): Tool { + return { + name: `t-${permission}`, + description: `tool ${permission}`, + permission, + schema: z.object({}), + handler: async () => ({}), + }; +} + +const modes: PermissionMode[] = ["read-only", "supervised", "autonomous"]; +const perms: ToolPermission[] = ["read", "write", "dangerous"]; + +describe("canUseTool", () => { + it("read-only blocks write and dangerous, allows read", () => { + expect(canUseTool(mkTool("read"), "read-only")).toBe(true); + expect(canUseTool(mkTool("write"), "read-only")).toBe(false); + expect(canUseTool(mkTool("dangerous"), "read-only")).toBe(false); + }); + + it("supervised allows everything", () => { + for (const p of perms) { + expect(canUseTool(mkTool(p), "supervised")).toBe(true); + } + }); + + it("autonomous allows everything", () => { + for (const p of perms) { + expect(canUseTool(mkTool(p), "autonomous")).toBe(true); + } + }); +}); + +describe("needsConfirmation", () => { + it("only supervised + write/dangerous triggers confirmation", () => { + for (const mode of modes) { + for (const p of perms) { + const expected = mode === "supervised" && (p === "write" || p === "dangerous"); + expect(needsConfirmation(mkTool(p), mode)).toBe(expected); + } + } + }); + + it("read tools never need confirmation", () => { + for (const mode of modes) { + expect(needsConfirmation(mkTool("read"), mode)).toBe(false); + } + }); + + it("autonomous never needs confirmation (pre-trusted)", () => { + for (const p of perms) { + expect(needsConfirmation(mkTool(p), "autonomous")).toBe(false); + } + }); + + it("read-only never needs confirmation (write tools are hidden)", () => { + for (const p of perms) { + expect(needsConfirmation(mkTool(p), "read-only")).toBe(false); + } + }); +}); diff --git a/tests/unit/agent/registry.test.ts b/tests/unit/agent/registry.test.ts new file mode 100644 index 0000000..156cdea --- /dev/null +++ b/tests/unit/agent/registry.test.ts @@ -0,0 +1,208 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { z } from "zod"; +import { ToolRegistry } from "../../../packages/core/src/agent/registry.js"; +import type { + PermissionMode, + Tool, + ToolContext, +} from "../../../packages/core/src/agent/types.js"; + +vi.mock("../../../packages/core/src/logging.js", () => ({ + writeLogEvent: vi.fn(), +})); + +function makeCtx( + mode: PermissionMode, + overrides: Partial = {}, +): ToolContext { + return { + mode, + confirm: vi.fn(async () => true), + log: vi.fn(), + ...overrides, + }; +} + +function readTool(name = "reader"): Tool<{ q?: string }, { echoed: string }> { + return { + name, + description: `echo read tool (${name})`, + permission: "read", + schema: z.object({ q: z.string().optional() }), + handler: async (input) => ({ echoed: input.q ?? "" }), + }; +} + +function writeTool(name = "writer"): Tool<{ value: string }, { written: string }> { + return { + name, + description: `mutates config (${name})`, + permission: "write", + schema: z.object({ value: z.string() }), + handler: async (input) => ({ written: input.value }), + }; +} + +function dangerousTool(name = "bomb"): Tool<{ target: string }, { nuked: string }> { + return { + name, + description: `destroys ${name}`, + permission: "dangerous", + schema: z.object({ target: z.string() }), + handler: async (input) => ({ nuked: input.target }), + }; +} + +describe("ToolRegistry — registration", () => { + let registry: ToolRegistry; + + beforeEach(() => { + registry = new ToolRegistry(); + }); + + it("registers and retrieves tools", () => { + const tool = readTool(); + registry.register(tool); + expect(registry.has("reader")).toBe(true); + expect(registry.get("reader")).toBe(tool); + }); + + it("throws on duplicate registration", () => { + registry.register(readTool()); + expect(() => registry.register(readTool())).toThrow(/already registered/); + }); + + it("list() returns all tools or filtered subset", () => { + registry.register(readTool("r1")); + registry.register(writeTool("w1")); + expect(registry.list()).toHaveLength(2); + expect(registry.list((t) => t.permission === "read")).toHaveLength(1); + }); +}); + +describe("ToolRegistry — getSchemas (mode filtering)", () => { + let registry: ToolRegistry; + + beforeEach(() => { + registry = new ToolRegistry(); + registry.register(readTool("r1")); + registry.register(writeTool("w1")); + registry.register(dangerousTool("d1")); + }); + + it("read-only mode exposes only read tools", () => { + const schemas = registry.getSchemas("read-only"); + const names = schemas.map((s) => s.name); + expect(names).toEqual(["r1"]); + }); + + it("supervised mode exposes all tools", () => { + const schemas = registry.getSchemas("supervised"); + const names = schemas.map((s) => s.name).sort(); + expect(names).toEqual(["d1", "r1", "w1"]); + }); + + it("autonomous mode exposes all tools", () => { + const schemas = registry.getSchemas("autonomous"); + expect(schemas).toHaveLength(3); + }); + + it("derives JSON schema shape from zod", () => { + const schemas = registry.getSchemas("supervised"); + const readerSchema = schemas.find((s) => s.name === "r1"); + expect(readerSchema?.inputSchema).toEqual({ + type: "object", + properties: { q: { type: "string" } }, + // `q` is optional so no `required` key. + }); + const writerSchema = schemas.find((s) => s.name === "w1"); + expect(writerSchema?.inputSchema).toMatchObject({ + type: "object", + properties: { value: { type: "string" } }, + required: ["value"], + }); + }); +}); + +describe("ToolRegistry — execute", () => { + let registry: ToolRegistry; + + beforeEach(() => { + registry = new ToolRegistry(); + }); + + it("returns an error result for unknown tools", async () => { + const result = await registry.execute("nope", {}, makeCtx("supervised")); + expect(result.ok).toBe(false); + if (!result.ok) expect(result.error).toMatch(/Unknown tool/); + }); + + it("validates input against zod schema", async () => { + registry.register(writeTool()); + const result = await registry.execute( + "writer", + { value: 42 }, // wrong type + makeCtx("autonomous"), + ); + expect(result.ok).toBe(false); + if (!result.ok) expect(result.error).toMatch(/Invalid input/); + }); + + it("blocks write tools under read-only mode", async () => { + registry.register(writeTool()); + const result = await registry.execute( + "writer", + { value: "x" }, + makeCtx("read-only"), + ); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.blocked).toBe(true); + expect(result.error).toMatch(/read-only/); + } + }); + + it("calls confirm for write tools under supervised mode", async () => { + registry.register(writeTool()); + const confirm = vi.fn(async () => true); + const ctx = makeCtx("supervised", { confirm }); + const result = await registry.execute("writer", { value: "hi" }, ctx); + expect(confirm).toHaveBeenCalledTimes(1); + expect(result.ok).toBe(true); + if (result.ok) expect(result.output).toEqual({ written: "hi" }); + }); + + it("returns blocked when confirmation is declined", async () => { + registry.register(writeTool()); + const confirm = vi.fn(async () => false); + const ctx = makeCtx("supervised", { confirm }); + const result = await registry.execute("writer", { value: "hi" }, ctx); + expect(result.ok).toBe(false); + if (!result.ok) expect(result.blocked).toBe(true); + }); + + it("skips confirmation in autonomous mode", async () => { + registry.register(dangerousTool()); + const confirm = vi.fn(async () => true); + const ctx = makeCtx("autonomous", { confirm }); + const result = await registry.execute("bomb", { target: "x" }, ctx); + expect(confirm).not.toHaveBeenCalled(); + expect(result.ok).toBe(true); + }); + + it("catches handler errors and returns ok:false", async () => { + const broken: Tool<{ x: string }, never> = { + name: "broken", + description: "throws", + permission: "read", + schema: z.object({ x: z.string() }), + handler: async () => { + throw new Error("kaboom"); + }, + }; + registry.register(broken); + const result = await registry.execute("broken", { x: "y" }, makeCtx("autonomous")); + expect(result.ok).toBe(false); + if (!result.ok) expect(result.error).toBe("kaboom"); + }); +}); diff --git a/tests/unit/cli-auth.test.ts b/tests/unit/cli-auth.test.ts index ba4a0a2..a688432 100644 --- a/tests/unit/cli-auth.test.ts +++ b/tests/unit/cli-auth.test.ts @@ -52,7 +52,8 @@ describe("CLI auth: config and profile operations", () => { it("returns default config when no config.json exists", () => { const config = loadConfig(); expect(config.version).toBe(1); - expect(config.activeProfile).toBe("default"); + // New installs start with null activeProfile (bare-mode default). + expect(config.activeProfile).toBeNull(); expect(Object.keys(config.profiles)).toHaveLength(0); }); @@ -160,15 +161,15 @@ describe("CLI auth: config and profile operations", () => { ); const config = loadConfig(); expect(config.activeProfile).toBe("main"); - expect(config.profiles[config.activeProfile]).toBeDefined(); - expect(config.profiles[config.activeProfile].tool).toBe("claude"); + const activeKey = config.activeProfile as string; + expect(config.profiles[activeKey]).toBeDefined(); + expect(config.profiles[activeKey].tool).toBe("claude"); }); it("active profile may not exist in profiles map", () => { - // Default config has activeProfile "default" but no profile entry + // Default config now has activeProfile null (bare mode); no entry. const config = loadConfig(); - expect(config.activeProfile).toBe("default"); - expect(config.profiles[config.activeProfile]).toBeUndefined(); + expect(config.activeProfile).toBeNull(); }); }); @@ -188,7 +189,8 @@ describe("CLI auth: config and profile operations", () => { "dev", ); const config = loadConfig(); - const activeProfile = config.profiles[config.activeProfile]; + const activeKey = config.activeProfile as string; + const activeProfile = config.profiles[activeKey]; expect(activeProfile).toBeDefined(); expect(activeProfile.tool).toBe("gemini"); }); diff --git a/tests/unit/knowledge/feature-index.test.ts b/tests/unit/knowledge/feature-index.test.ts new file mode 100644 index 0000000..2a60931 --- /dev/null +++ b/tests/unit/knowledge/feature-index.test.ts @@ -0,0 +1,76 @@ +import { describe, it, expect } from "vitest"; +import { + FEATURES_INDEX, + getFeature, + getFeaturesByStatus, +} from "../../../packages/core/src/knowledge/feature-index.js"; + +describe("FEATURES_INDEX", () => { + it("contains the roundtable feature and its core orchestration siblings", () => { + const ids = FEATURES_INDEX.map((f) => f.id); + for (const required of [ + "roundtable", + "shared-layer", + "hook-pipeline", + "telemetry", + "profile-management", + "profile-inheritance", + ]) { + expect(ids).toContain(required); + } + }); + + it("entries have unique ids", () => { + const ids = FEATURES_INDEX.map((f) => f.id); + expect(new Set(ids).size).toBe(ids.length); + }); + + it("entries have a valid status", () => { + for (const f of FEATURES_INDEX) { + expect(["shipped", "roadmap", "deferred"]).toContain(f.status); + expect(f.summary.trim().length).toBeGreaterThan(0); + } + }); +}); + +describe("getFeature", () => { + it("finds features by exact id", () => { + const f = getFeature("roundtable"); + expect(f).toBeDefined(); + expect(f?.name).toBe("Roundtable"); + }); + + it("finds features by exact name (case-insensitive)", () => { + const f = getFeature("Shared Layer"); + expect(f?.id).toBe("shared-layer"); + const f2 = getFeature("shared layer"); + expect(f2?.id).toBe("shared-layer"); + }); + + it("does fuzzy matching on normalized id/name", () => { + expect(getFeature("shared_layer")?.id).toBe("shared-layer"); + expect(getFeature("HookPipeline")?.id).toBe("hook-pipeline"); + // partial + expect(getFeature("roundtab")?.id).toBe("roundtable"); + }); + + it("returns undefined for garbage", () => { + expect(getFeature("xyz-not-a-feature-zzz")).toBeUndefined(); + expect(getFeature("")).toBeUndefined(); + }); +}); + +describe("getFeaturesByStatus", () => { + it("returns only shipped features when asked", () => { + const shipped = getFeaturesByStatus("shipped"); + expect(shipped.length).toBeGreaterThan(0); + expect(shipped.every((f) => f.status === "shipped")).toBe(true); + }); + + it("returns roadmap features when asked", () => { + const roadmap = getFeaturesByStatus("roadmap"); + // At least one roadmap entry is seeded (ai-assistant). + expect(roadmap.length).toBeGreaterThan(0); + expect(roadmap.every((f) => f.status === "roadmap")).toBe(true); + }); +}); diff --git a/tests/unit/knowledge/runtime.test.ts b/tests/unit/knowledge/runtime.test.ts new file mode 100644 index 0000000..340c97a --- /dev/null +++ b/tests/unit/knowledge/runtime.test.ts @@ -0,0 +1,190 @@ +import { describe, it, expect } from "vitest"; +import { + buildSystemPrompt, + estimateTokens, + type KnowledgeContext, + type LaunchHistoryEntry, +} from "../../../packages/core/src/knowledge/runtime.js"; +import type { ArcConfig, Profile } from "../../../packages/core/src/types.js"; + +function sampleProfile(overrides: Partial = {}): Profile { + return { + authType: "oauth", + tool: "claude", + configDir: "/home/bailey/.arc/profiles/work", + description: "Work profile", + createdAt: "2026-04-10T12:00:00.000Z", + useShared: true, + enforcement: "advise", + ...overrides, + }; +} + +function sampleConfig(): ArcConfig { + return { + version: 1, + activeProfile: "work", + profiles: { + work: sampleProfile(), + personal: sampleProfile({ + description: "Personal", + useShared: false, + }), + }, + }; +} + +function sampleLaunches(n = 3): LaunchHistoryEntry[] { + return Array.from({ length: n }, (_, i) => ({ + profile: i % 2 === 0 ? "work" : "personal", + tool: "claude", + mode: "default" as const, + startedAt: `2026-04-1${i + 1}T09:00:00Z`, + exitCode: 0, + durationMs: 12_000 + i * 1000, + })); +} + +function baseContext( + overrides: Partial = {}, +): KnowledgeContext { + const config = sampleConfig(); + return { + config, + recentLaunches: sampleLaunches(), + activeProfile: config.activeProfile ? config.profiles[config.activeProfile] ?? null : null, + arcVersion: "1.7.0", + doctorIssues: [], + permissionMode: "supervised", + toolCategories: ["profile", "launch", "diagnostic", "orchestration"], + ...overrides, + }; +} + +describe("estimateTokens", () => { + it("uses a 4 char/token heuristic", () => { + expect(estimateTokens("")).toBe(0); + expect(estimateTokens("abcd")).toBe(1); + expect(estimateTokens("a".repeat(4000))).toBe(1000); + }); +}); + +describe("buildSystemPrompt", () => { + it("produces all 6 top-level sections", () => { + const prompt = buildSystemPrompt(baseContext()); + for (const heading of [ + "## Identity", + "## Capabilities", + "## ARC architecture", + "## Key concepts", + "## Live state", + "## Behavior rules", + ]) { + expect(prompt).toContain(heading); + } + }); + + it("embeds live state: active profile, version, permission mode", () => { + const prompt = buildSystemPrompt(baseContext()); + expect(prompt).toContain("ARC version: 1.7.0"); + expect(prompt).toContain("Permission mode: supervised"); + expect(prompt).toContain("Active profile: work"); + expect(prompt).toContain("tool=claude"); + expect(prompt).toContain("Total profiles: 2"); + }); + + it("lists up to 3 recent launches only", () => { + const many = Array.from({ length: 12 }, (_, i) => ({ + profile: "work", + tool: "claude", + mode: "default" as const, + startedAt: `2026-04-${String(i + 1).padStart(2, "0")}T00:00:00Z`, + exitCode: 0, + durationMs: 1000, + })); + const prompt = buildSystemPrompt(baseContext({ recentLaunches: many })); + // count launch bullet lines (prefixed with " - ") inside the Live state section + const liveSection = prompt + .split("## Live state")[1]! + .split("## ")[0]!; + const bulletCount = (liveSection.match(/^ - /gm) || []).length; + // 3 launches + 1 doctor "none" bullet + expect(bulletCount).toBeLessThanOrEqual(5); + expect(liveSection).toContain("2026-04-01"); + expect(liveSection).toContain("2026-04-02"); + expect(liveSection).toContain("2026-04-03"); + expect(liveSection).not.toContain("2026-04-05"); + }); + + it("encodes supervised-mode behavior rules", () => { + const prompt = buildSystemPrompt(baseContext({ permissionMode: "supervised" })); + expect(prompt).toContain("supervised mode"); + }); + + it("encodes read-only-mode behavior rules", () => { + const prompt = buildSystemPrompt(baseContext({ permissionMode: "read-only" })); + expect(prompt).toContain("read-only mode"); + }); + + it("encodes autonomous-mode behavior rules", () => { + const prompt = buildSystemPrompt(baseContext({ permissionMode: "autonomous" })); + expect(prompt).toContain("autonomous mode"); + }); + + it("stays within the 4000-token cap even with long doctor issues", () => { + const hugeIssues = Array.from({ length: 500 }, (_, i) => + `Issue #${i}: ` + "x".repeat(400), + ); + const prompt = buildSystemPrompt( + baseContext({ doctorIssues: hugeIssues }), + ); + expect(estimateTokens(prompt)).toBeLessThanOrEqual(4000); + }); + + it("truncates the doctor issue list to at most 8 items", () => { + const issues = Array.from({ length: 20 }, (_, i) => `issue-${i}`); + const prompt = buildSystemPrompt(baseContext({ doctorIssues: issues })); + const liveSection = prompt + .split("## Live state")[1]! + .split("## ")[0]!; + expect(liveSection).toContain("issue-0"); + expect(liveSection).toContain("issue-7"); + expect(liveSection).not.toContain("issue-8"); + }); + + it("shows 'none' when there are no doctor issues", () => { + const prompt = buildSystemPrompt(baseContext({ doctorIssues: [] })); + const liveSection = prompt + .split("## Live state")[1]! + .split("## ")[0]!; + expect(liveSection).toContain("Doctor issues:"); + expect(liveSection).toContain("- none"); + }); + + it("handles a null active profile gracefully", () => { + const cfg = sampleConfig(); + cfg.activeProfile = ""; + const prompt = buildSystemPrompt( + baseContext({ config: cfg, activeProfile: null }), + ); + expect(prompt).toContain("Active profile:"); + expect(prompt).toContain("tool=unknown"); + expect(prompt).toContain("auth=unknown"); + }); + + it("lists provided tool categories", () => { + const prompt = buildSystemPrompt( + baseContext({ toolCategories: ["alpha", "beta", "gamma"] }), + ); + expect(prompt).toContain("- alpha"); + expect(prompt).toContain("- beta"); + expect(prompt).toContain("- gamma"); + }); + + it("produces a prompt roughly within the 2500-3500 token range for a typical context", () => { + const prompt = buildSystemPrompt(baseContext()); + const tokens = estimateTokens(prompt); + expect(tokens).toBeGreaterThan(500); + expect(tokens).toBeLessThan(4000); + }); +}); diff --git a/tests/unit/knowledge/static.test.ts b/tests/unit/knowledge/static.test.ts new file mode 100644 index 0000000..0f83345 --- /dev/null +++ b/tests/unit/knowledge/static.test.ts @@ -0,0 +1,92 @@ +import { describe, it, expect } from "vitest"; +import { + ARC_KNOWLEDGE, + COMMANDS_CATALOG, + type CommandCategory, +} from "../../../packages/core/src/knowledge/static.js"; + +describe("COMMANDS_CATALOG", () => { + it("contains the key commands required for Phase 3", () => { + const names = COMMANDS_CATALOG.map((c) => c.name); + const required = [ + "profile list", + "profile show", + "profile switch", + "profile create", + "profile clone", + "profile delete", + "profile import", + "profile export", + "profile import-file", + "profile clear-active", + "launch", + "launch --bare", + "launch --native", + "launch --worker", + "run", + "doctor", + "logs", + "backup create", + "backup restore", + "backup list", + "shared pull", + "shared push", + "mcp connect", + "mcp list", + "swap capture", + "swap to", + "instructions show", + "instructions set", + "instructions edit", + "instructions clear", + "provider set", + "provider show", + "provider clear", + "provider presets", + "dashboard", + "which", + "shell-init", + "update", + ]; + for (const r of required) { + expect(names).toContain(r); + } + }); + + it("has no duplicate command names", () => { + const names = COMMANDS_CATALOG.map((c) => c.name); + const unique = new Set(names); + expect(unique.size).toBe(names.length); + }); + + it("every command has a non-empty description and a valid category", () => { + const validCategories: CommandCategory[] = [ + "profile", + "launch", + "diagnostic", + "orchestration", + "data", + "utility", + ]; + for (const cmd of COMMANDS_CATALOG) { + expect(cmd.description.trim().length).toBeGreaterThan(0); + expect(validCategories).toContain(cmd.category); + } + }); + + it("example commands start with `arc `", () => { + for (const cmd of COMMANDS_CATALOG) { + for (const ex of cmd.examples ?? []) { + expect(ex.startsWith("arc ")).toBe(true); + } + } + }); + + it("exposes purpose, architecture, concepts, commands, and docLinks on ARC_KNOWLEDGE", () => { + expect(ARC_KNOWLEDGE.purpose.length).toBeGreaterThan(400); + expect(ARC_KNOWLEDGE.architecture.length).toBeGreaterThan(600); + expect(Object.keys(ARC_KNOWLEDGE.concepts).length).toBeGreaterThanOrEqual(10); + expect(ARC_KNOWLEDGE.commands).toBe(COMMANDS_CATALOG); + expect(Object.keys(ARC_KNOWLEDGE.docLinks).length).toBeGreaterThan(0); + }); +}); diff --git a/tests/unit/null-active-profile.test.ts b/tests/unit/null-active-profile.test.ts new file mode 100644 index 0000000..bab5829 --- /dev/null +++ b/tests/unit/null-active-profile.test.ts @@ -0,0 +1,185 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; +import { + defaultConfig, + resolveProfileName, + getActiveProfile, + validateConfig, +} from "../../packages/core/src/config.js"; +import type { ArcConfig, Profile } from "../../packages/core/src/types.js"; + +// ─── Temp dir / env setup ──────────────────────────────────────────── + +let tmpDir: string; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "arc-null-active-")); + process.env["ARC_DIR"] = tmpDir; + // Silence log output used by launch.ts when imported indirectly. + vi.spyOn(console, "log").mockImplementation(() => {}); + vi.spyOn(process.stderr, "write").mockImplementation(() => true); + vi.spyOn(process.stdout, "write").mockImplementation(() => true); +}); + +afterEach(() => { + delete process.env["ARC_DIR"]; + vi.restoreAllMocks(); + try { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } catch { + // best-effort cleanup + } +}); + +function baseProfile(overrides: Partial = {}): Profile { + return { + authType: "oauth", + tool: "claude", + configDir: path.join(tmpDir, "profiles", "p"), + createdAt: "2026-01-01T00:00:00Z", + ...overrides, + }; +} + +function makeConfig( + profiles: Record, + activeProfile: string | null = null +): ArcConfig { + return { version: 1, activeProfile, profiles }; +} + +// ─── 1. Null activeProfile resolution ──────────────────────────────── + +describe("null activeProfile resolution", () => { + it("defaultConfig() starts with null activeProfile", () => { + const cfg = defaultConfig(); + expect(cfg.activeProfile).toBeNull(); + expect(cfg.profiles).toEqual({}); + }); + + it("getActiveProfile() returns undefined when null", () => { + const cfg = makeConfig({}, null); + expect(getActiveProfile(cfg)).toBeUndefined(); + }); + + it("resolveProfileName() throws a clear error when null and no name", () => { + const cfg = makeConfig({}, null); + expect(() => resolveProfileName(cfg)).toThrow( + /No active profile\. Use 'arc profile switch ' or pass --profile\./ + ); + }); + + it("resolveProfileName() returns explicit name even when active is null", () => { + const cfg = makeConfig({ work: baseProfile() }, null); + expect(resolveProfileName(cfg, "work")).toBe("work"); + }); + + it("validateConfig() accepts null activeProfile", () => { + const cfg = { + version: 1, + activeProfile: null, + profiles: { + work: { + authType: "oauth", + configDir: "/tmp/work", + createdAt: "2026-01-01T00:00:00Z", + }, + }, + }; + expect(validateConfig(cfg)).toBe(true); + }); + + it("validateConfig() still rejects non-string, non-null activeProfile", () => { + const cfg = { + version: 1, + activeProfile: 42, + profiles: {}, + }; + expect(validateConfig(cfg)).toBe(false); + }); +}); + +// ─── 2. Bare mode — env isolation contract ────────────────────────── + +describe("bare launch mode — env contract", () => { + it("handleBareLaunch source code does not inject profile env vars", async () => { + // Contract test: the bare-launch implementation must never set any of + // the profile-specific env vars that the standard launch path injects. + const src = fs.readFileSync( + path.resolve( + __dirname, + "..", + "..", + "packages", + "cli", + "src", + "commands", + "launch.ts" + ), + "utf-8" + ); + const bareFn = src.slice( + src.indexOf("export async function handleBareLaunch"), + src.indexOf("/** Suggest an install command") + ); + expect(bareFn.length).toBeGreaterThan(0); + // Env vars ARC injects in normal launch — must NOT appear in bare path. + expect(bareFn).not.toContain("CLAUDE_CONFIG_DIR"); + expect(bareFn).not.toContain("GEMINI_CLI_HOME"); + expect(bareFn).not.toContain("CODEX_HOME"); + expect(bareFn).not.toContain("ARC_AGENT_INSTRUCTIONS"); + expect(bareFn).not.toContain("buildProfileEnv"); + // Must still spawn the tool with stdio inherit. + expect(bareFn).toContain("spawnSync"); + expect(bareFn).toContain('stdio: "inherit"'); + }); +}); + +// ─── 3. Tool-name inference (pure helper) ──────────────────────────── + +describe("shouldInferBare — tool-name inference", () => { + it("infers bare when name is a known tool AND no profile matches", async () => { + const { shouldInferBare } = await import( + "../../packages/cli/src/commands/launch.js" + ); + expect(shouldInferBare("claude", [], false)).toBe(true); + expect(shouldInferBare("codex", ["work"], false)).toBe(true); + expect(shouldInferBare("gemini", ["dev", "prod"], false)).toBe(true); + expect(shouldInferBare("hermes", [], false)).toBe(true); + expect(shouldInferBare("openclaw", [], false)).toBe(true); + }); + + it("does NOT infer when a profile with the tool-name exists", async () => { + const { shouldInferBare } = await import( + "../../packages/cli/src/commands/launch.js" + ); + expect(shouldInferBare("claude", ["claude"], false)).toBe(false); + expect(shouldInferBare("codex", ["codex", "work"], false)).toBe(false); + }); + + it("does NOT infer for unknown tools (avoids hijacking real profile names)", async () => { + const { shouldInferBare } = await import( + "../../packages/cli/src/commands/launch.js" + ); + expect(shouldInferBare("work", [], false)).toBe(false); + expect(shouldInferBare("random-thing", [], false)).toBe(false); + }); + + it("does NOT infer when no name is given", async () => { + const { shouldInferBare } = await import( + "../../packages/cli/src/commands/launch.js" + ); + expect(shouldInferBare(undefined, [], false)).toBe(false); + }); + + it("explicit bare=true always wins, regardless of name/profile state", async () => { + const { shouldInferBare } = await import( + "../../packages/cli/src/commands/launch.js" + ); + expect(shouldInferBare("work", ["work"], true)).toBe(true); + expect(shouldInferBare(undefined, [], true)).toBe(true); + expect(shouldInferBare("anything", [], true)).toBe(true); + }); +}); diff --git a/user-docs/architecture/index.md b/user-docs/architecture/index.md index cdc163a..439cd52 100644 --- a/user-docs/architecture/index.md +++ b/user-docs/architecture/index.md @@ -97,3 +97,26 @@ See [MCP Protocol](/architecture/mcp) for details. All state is stored in `~/.arc/` as JSON files. Credentials use the OS keyring. Traces export to JSONL and optionally OTLP. See [Configuration](/reference/configuration) for the full data layout. + +## Agent Client + Roundtable (internal) + +ARC includes an internal **agent-client** layer that spawns a profile's CLI tool (`claude`, `codex`, `gemini`) with a prompt and captures structured output. This is the substrate for programmatic orchestration — the CLI tool's own tool use, streaming, and MCP integration flow through unchanged. + +Modules: + +| Module | Purpose | +|--------|---------| +| `packages/core/src/agent-client/` | One-shot CLI spawn with per-tool stream parsers and MCP config injection | +| `packages/core/src/agent/` | Tool registry + agent loop for tool-use dispatch | +| `packages/core/src/knowledge/` | Static + runtime system prompt composition (ARC feature knowledge) | +| `packages/core/src/hooks/roundtable.ts` | Multi-agent discussion state machine (turns, roles, synthesis) | + +These modules are internal building blocks — not user-facing yet. The shipped `roundtable` hook drives in-session discussions when the trigger phrases are used; the orchestrator that loops over agents programmatically is tracked separately. + +Coming soon: + +- `arc chat` — terminal REPL over a chosen profile with read-only / supervised / autonomous permission modes +- `arc roundtable --agents a,b,c` — one-shot multi-agent discussion from the CLI +- Dashboard chat panel and roundtable configurator + +See [docs/plans/ai-and-roundtable.md](https://github.com/Codename-11/ARC/blob/master/docs/plans/ai-and-roundtable.md) for the full design. diff --git a/user-docs/guide/getting-started.md b/user-docs/guide/getting-started.md index 9df3e0d..a7ae714 100644 --- a/user-docs/guide/getting-started.md +++ b/user-docs/guide/getting-started.md @@ -55,6 +55,24 @@ pnpm install pnpm build ``` +## Your First Launch + +### Fast path — no profile + +If you just want to run a tool through ARC without any configuration: + +```bash +arc run claude +arc run gemini +arc run codex +``` + +`arc run ` is a native passthrough — no profile env, no ARC overlay. Useful if you have ARC installed but haven't set up a profile yet. See [Launch without a profile](/guide/profiles#launch-without-a-profile) for details. + +### Full path — with a profile + +For isolated credentials, shared layer, hooks, and everything else ARC provides, create a profile. + ## First Profile ### Option 1: Onboarding Wizard diff --git a/user-docs/guide/profiles.md b/user-docs/guide/profiles.md index 0ad26c2..d5c950c 100644 --- a/user-docs/guide/profiles.md +++ b/user-docs/guide/profiles.md @@ -31,6 +31,15 @@ arc use Sets the default profile used by `arc launch` and the shell wrapper. +### Clear the Active Profile + +```bash +arc profile switch none +arc profile clear-active +``` + +Both commands set `activeProfile` to `null` in `~/.arc/config.json`. `arc list` and the TUI render this as `(none)`. While there is no active profile, `arc launch` requires an explicit profile name or an inferred tool (see [Launch without a profile](#launch-without-a-profile) below). + ## Show Details ```bash @@ -145,6 +154,50 @@ Resolution order: 2. Workspace `arc.json` (if present in cwd or parent) 3. Active profile in `~/.arc/config.json` +## Launch Modes + +Each profile launches in one of two modes: + +| Mode | Behavior | +|------|----------| +| `native` (default) | Full TTY handoff — the tool paints its own TUI with statusLine and other chrome. ARC exits cleanly to the child process. | +| `worker` | ARC supervises the child process: captures stdout, enables hooks/orchestration, but suppresses the tool's native TUI chrome. Required for roundtable and multi-agent pipelines. | + +Set the mode on the profile or override per launch: + +```bash +arc launch work --native # Force native for this launch +arc launch work --worker # Force worker for this launch +``` + +In the TUI ProfilesView, press `m` on a selected profile to toggle between modes. + +The roundtable orchestrator always forces `worker` mode regardless of the profile setting, because it needs to capture each agent's output. + +::: tip +If you previously set `CLAUDE_CODE_NO_FLICKER=1` to stop Claude's screen from flashing, unset it — `arc doctor` will flag it. Claude's `/tui fullscreen` command is the current mechanism and `native` launch mode gives the tool full control of the terminal. +::: + +## Launch Without a Profile + +You can use ARC-installed tools without any ARC overlay — no env injection, no hook pipeline, no profile config directory: + +```bash +arc run claude # Native passthrough — find claude on PATH and exec +arc run gemini +arc run codex + +arc launch --bare claude # Same, via launch command +``` + +Tool-name inference: `arc launch claude` with no profile named `claude` falls through to bare launch automatically. + +When to use: + +- You have ARC installed but want the tool's stock experience for a session +- You're comparing behavior with and without ARC +- You haven't created a profile yet and want to try the tool first + ## Status ```bash diff --git a/user-docs/reference/configuration.md b/user-docs/reference/configuration.md index eb443f5..7df0e2c 100644 --- a/user-docs/reference/configuration.md +++ b/user-docs/reference/configuration.md @@ -40,6 +40,8 @@ The central configuration file. Contains the profile registry, active profile, and display settings. +`activeProfile` may be either a profile name or `null`. When `null`, ARC has no default profile — `arc list` and the TUI render the active slot as `(none)`, and `arc launch` requires an explicit profile name or tool inference (see [Launch without a profile](/guide/profiles#launch-without-a-profile)). + ```json { "version": 1, @@ -92,6 +94,7 @@ The central configuration file. Contains the profile registry, active profile, a | `useSharedMemory` | `boolean?` | Whether `memory/` is linked to `shared/memory/` | | `useSharedProjects` | `boolean?` | Whether `projects/` is linked to `shared/projects/` | | `launchArgs` | `string[]?` | Default flags passed to the agent tool on every launch | +| `launchMode` | `"native" \| "worker"?` | Launch in full TTY handoff (`native`, default) or under ARC supervision (`worker`). See [Launch Modes](/guide/profiles#launch-modes) | | `enforcement` | `"off" \| "log" \| "advise" \| "enforce"?` | Hook enforcement mode for this profile. Defaults to `"log"`. See [Hooks & Supervision](/features/hooks) | | `hooks` | `object?` | Per-hook config overrides (`{ "hook-name": { enabled, timeout } }`). See [Hook Configuration](/features/hooks#configuring-hooks-per-profile) | From aaea8511e0c4738ee1b8380a957a5a2de9a8cb86 Mon Sep 17 00:00:00 2001 From: Bailey Dixon Date: Sat, 18 Apr 2026 09:28:58 -0400 Subject: [PATCH 08/17] docs: Mark Phase 0.7, 2, 3 complete in plan Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/plans/ai-and-roundtable.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/plans/ai-and-roundtable.md b/docs/plans/ai-and-roundtable.md index 96d0363..39b0e84 100644 --- a/docs/plans/ai-and-roundtable.md +++ b/docs/plans/ai-and-roundtable.md @@ -399,11 +399,14 @@ Update checkboxes in-place as phases complete. Add a `Completed YYYY-MM-DD` mark ### Phase 1 — Agent client (CLI-spawn) foundation - [x] **Completed 2026-04-18** — commit `6ff876b`. `packages/core/src/agent-client/` with Claude/Codex/Gemini clients, MCP injection per mcpMode variant, stream parsers, 48 unit tests. Unverified CLI flags flagged for Phase 4 smoke test. +### Phase 0.7 — Bare launch + clearable profile +- [x] **Completed 2026-04-18** — commit `443a78c`. `activeProfile: null` valid, `arc run `, `arc launch --bare`, tool-name inference, `arc profile switch none` / `clear-active`, TUI `x` key to clear, empty-state copy in Dash/Session. + ### Phase 2 — Tool registry + agent loop -- [ ] Not started +- [x] **Completed 2026-04-18** — commit `443a78c`. 16 ARC tools wired (11 read, 4 write, 1 dangerous), 3 permission modes, runAgent generator, 43 tests. Tool_result round-trip to LLM deferred to Phase 4 persistent sessions (noted in `loop.ts`). ### Phase 3 — Knowledge endowment -- [ ] Not started +- [x] **Completed 2026-04-18** — commit `443a78c`. `ARC_KNOWLEDGE` (architecture + 52-entry command catalog + 16-term glossary), `FEATURES_INDEX` (33 entries), `buildSystemPrompt()` composes 6 sections under 4K tokens (~1284 typical). 27 tests. ### Phase 4 — CLI `arc chat` - [ ] Not started From a14bedc54dd6b05c69db597bffb45d767730f469 Mon Sep 17 00:00:00 2001 From: Bailey Dixon Date: Sat, 18 Apr 2026 10:14:40 -0400 Subject: [PATCH 09/17] feat: arc chat CLI + roundtable orchestrator (Wave 1: Phases 4 + 5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 4 — arc chat interactive REPL - packages/core/src/chat/: ChatSession with append/serialize/load, per-profile session store at ~/.arc/profiles//chat-sessions/, list/load/delete/save helpers with atomic writes - packages/cli/src/commands/chat.ts: arc chat with --profile, --mode, --once, --no-tools, --session, --new flags. REPL supports /exit, /save, /new, /mode, /clear, /sessions, /resume, /help - constructPromptFromSession collapses full transcript per turn (v1 O(n^2) limit documented; soft-truncates at 15k tokens via estimateTokens from context-manager) - Supervised mode blocks writes with [y/N] readline confirm; --once auto-denies writes to avoid non-TTY hangs - 22 unit + integration tests (mocked agent client with scripted chunk stream + real ToolRegistry/runAgent wiring) Phase 5 — Roundtable orchestrator - packages/core/src/orchestration/: - delivery-policy.ts: AgentDeliveryPolicy with per-model profiles (Gemini 18s/1.6x, Claude 12s/1.45x, Codex 8s/1.2x), EMA latency tracker, MessagePriorityQueue for coalescing - staged-workflow.ts: PLAN/EXEC/VERIFY state machine, cursor-based StagedMessageBus + InMemoryMessageBus, DEFAULT_COMPLETION_PATTERNS - watchdog.ts: pure tick() nudge-at-3min / stall-at-5min protocol, injected deps for testability - roundtable.ts: RoundtableOrchestrator drives the existing hook (dedicated HookBus + HookStateStore per run); forces launchMode worker; adaptive pacing between turns; synthesizer JSON parsing with graceful fallback; virtual agents throw (Phase 5.1) - All three Agent-Forge ports attributed via top-of-file comments - 59 new unit tests (delivery / staged / watchdog / roundtable) + 10 first tests for the roundtable hook itself (prior coverage: 0) Build 554 KB; 1231/1232 tests pass (same tui-interactive flake that passes in isolation). Typecheck clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/cli/src/cli.ts | 50 ++ packages/cli/src/commands/chat.ts | 633 ++++++++++++++++++ packages/core/src/chat/index.ts | 22 + packages/core/src/chat/session.ts | 184 +++++ packages/core/src/chat/store.ts | 110 +++ packages/core/src/index.ts | 2 + .../core/src/orchestration/delivery-policy.ts | 220 ++++++ packages/core/src/orchestration/index.ts | 62 ++ packages/core/src/orchestration/roundtable.ts | 559 ++++++++++++++++ .../core/src/orchestration/staged-workflow.ts | 265 ++++++++ packages/core/src/orchestration/watchdog.ts | 222 ++++++ tests/integration/chat-cli.test.ts | 176 +++++ tests/unit/chat/session.test.ts | 127 ++++ tests/unit/chat/store.test.ts | 122 ++++ tests/unit/hooks/roundtable-hook.test.ts | 247 +++++++ .../orchestration/delivery-policy.test.ts | 143 ++++ tests/unit/orchestration/roundtable.test.ts | 311 +++++++++ .../orchestration/staged-workflow.test.ts | 110 +++ tests/unit/orchestration/watchdog.test.ts | 151 +++++ 19 files changed, 3716 insertions(+) create mode 100644 packages/cli/src/commands/chat.ts create mode 100644 packages/core/src/chat/index.ts create mode 100644 packages/core/src/chat/session.ts create mode 100644 packages/core/src/chat/store.ts create mode 100644 packages/core/src/orchestration/delivery-policy.ts create mode 100644 packages/core/src/orchestration/index.ts create mode 100644 packages/core/src/orchestration/roundtable.ts create mode 100644 packages/core/src/orchestration/staged-workflow.ts create mode 100644 packages/core/src/orchestration/watchdog.ts create mode 100644 tests/integration/chat-cli.test.ts create mode 100644 tests/unit/chat/session.test.ts create mode 100644 tests/unit/chat/store.test.ts create mode 100644 tests/unit/hooks/roundtable-hook.test.ts create mode 100644 tests/unit/orchestration/delivery-policy.test.ts create mode 100644 tests/unit/orchestration/roundtable.test.ts create mode 100644 tests/unit/orchestration/staged-workflow.test.ts create mode 100644 tests/unit/orchestration/watchdog.test.ts diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 8ec6bf3..6d770de 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -293,6 +293,56 @@ Examples: } ); + program + .command("chat") + .description("Interactive chat with your active profile's agent (with ARC tool use)") + .option("--profile ", "Profile to use (default: active)") + .option("--mode ", "Permission mode (read-only|supervised|autonomous)", "supervised") + .option("--once ", "One-shot mode: send prompt, stream response, exit") + .option("--no-tools", "Disable ARC tool use (plain chat only)") + .option("--session ", "Resume a previous chat session") + .option("--new", "Force a new session (default when --session is absent)") + .addHelpText( + "after", + ` +Examples: + $ arc chat (interactive REPL) + $ arc chat --once "list my profiles" (one-shot) + $ arc chat --mode read-only (no writes) + $ arc chat --session abc-123 (resume) + $ arc chat --no-tools (plain chat only) + +REPL commands: + /exit /quit End the session + /save Save the session to disk + /new Start a new session + /mode Switch permission mode + /sessions List saved sessions + /resume Resume a saved session + /help Show command list +`, + ) + .action( + async (opts: { + profile?: string; + mode?: string; + once?: string; + tools?: boolean; + session?: string; + new?: boolean; + }) => { + const mod = await import("./commands/chat.js"); + await mod.handleChat({ + profile: opts.profile, + mode: opts.mode as "read-only" | "supervised" | "autonomous" | undefined, + once: opts.once, + noTools: opts.tools === false, + session: opts.session, + new: opts.new, + }); + }, + ); + program .command("set-key [name]") .description("Store an API key for a profile") diff --git a/packages/cli/src/commands/chat.ts b/packages/cli/src/commands/chat.ts new file mode 100644 index 0000000..511af85 --- /dev/null +++ b/packages/cli/src/commands/chat.ts @@ -0,0 +1,633 @@ +/** + * `arc chat` — interactive terminal chat REPL that streams an ARC profile's + * agent responses and drives ARC tools via the tool registry. + * + * See docs/plans/ai-and-roundtable.md — Phase 4. + * + * Architecture: + * - `AgentClient` (Phase 1) — spawns the profile's CLI tool one-shot. + * - `ToolRegistry` + `runAgent` (Phase 2) — local dispatch of tool calls. + * - `buildSystemPrompt` (Phase 3) — system-prompt composition. + * - `ChatSession` + store (Phase 4) — multi-turn persistence. + */ + +import readline from "node:readline"; +import pc from "picocolors"; +import { + ChatSession, + saveSession, + loadSession, + listSessions, + getAgentClientForProfile, + ToolRegistry, + registerArcTools, + runAgent, + buildSystemPrompt, + getRecentLaunches, + loadConfig, + resolveProfile, + estimateTokens, + type AgentClient, + type Profile, + type ChatMessage, + type ToolCallRecord, + type PermissionMode, + type ToolContext, + type AgentEvent, +} from "@axiom-labs/arc-core"; +import { getVersion } from "../display.js"; + +// --------------------------------------------------------------------------- +// Options +// --------------------------------------------------------------------------- + +export interface ChatOptions { + profile?: string; + mode?: PermissionMode; + once?: string; + noTools?: boolean; + session?: string; + new?: boolean; +} + +// --------------------------------------------------------------------------- +// ANSI helpers (self-contained — no dependency on TTY-only helpers in display) +// --------------------------------------------------------------------------- + +function writeText(s: string): void { + process.stdout.write(s); +} +function writeLine(s: string): void { + process.stdout.write(s + "\n"); +} +function printAssistantText(s: string): void { + writeText(s); +} +function printThinking(s: string): void { + writeText(pc.dim(s)); +} +function printToolCall(tool: string, input: unknown): void { + let body: string; + try { + body = JSON.stringify(input); + } catch { + body = String(input); + } + writeLine("\n" + pc.cyan(`→ tool:${tool}`) + " " + pc.dim(body)); +} +function printToolResult(tool: string, result: unknown, ok: boolean): void { + const label = ok ? pc.gray("← result") : pc.red("← error"); + let body: string; + try { + body = typeof result === "string" ? result : JSON.stringify(result); + } catch { + body = String(result); + } + if (body.length > 400) body = body.slice(0, 400) + "..."; + writeLine(label + ` ${pc.gray(tool)} ` + pc.dim(body)); +} +function printSystem(s: string): void { + writeLine(pc.blue("\u2139") + " " + s); +} +function printError(s: string): void { + process.stderr.write(pc.red("\u2716") + " " + s + "\n"); +} + +// --------------------------------------------------------------------------- +// Prompt construction +// --------------------------------------------------------------------------- + +/** + * Construct the single-string prompt fed to a one-shot `AgentClient` on every + * turn. + * + * v1 LIMITATION (see docs/plans/ai-and-roundtable.md — Phase 4): + * One-shot agent clients cannot accept additional `tool_result` messages into + * an existing session, so we must replay the full conversation on every turn. + * This produces O(n^2) context growth as the conversation lengthens. A future + * phase will upgrade to persistent TTY sessions (`inputMethod: "sendKeys"`) + * and remove this replay entirely. + */ +export function constructPromptFromSession(session: ChatSession): string { + const MAX_PROMPT_TOKENS = 15_000; + + const parts: string[] = []; + parts.push(""); + + // We iterate sequentially and drop the oldest non-system turns first if + // we're over budget. System messages are always kept at the top. + const systemMsgs: ChatMessage[] = []; + const convMsgs: ChatMessage[] = []; + for (const m of session.messages) { + if (m.role === "system") systemMsgs.push(m); + else convMsgs.push(m); + } + + // Work backwards so we preserve the most recent context when truncating. + const kept: ChatMessage[] = []; + let runningChars = 0; + const softLimitChars = MAX_PROMPT_TOKENS * 4; + for (let i = convMsgs.length - 1; i >= 0; i--) { + const m = convMsgs[i]; + runningChars += m.content.length + 64; + if (runningChars > softLimitChars && kept.length > 0) break; + kept.unshift(m); + } + if (kept.length < convMsgs.length) { + kept.unshift({ + role: "system", + content: `(${convMsgs.length - kept.length} earlier messages omitted to fit context)`, + timestamp: new Date().toISOString(), + }); + } + + for (const m of systemMsgs) { + parts.push(`System: ${m.content}`); + } + for (const m of kept) { + if (m.role === "user") { + parts.push(`User: ${m.content}`); + } else if (m.role === "assistant") { + let line = `Assistant: ${m.content}`; + if (m.toolCalls && m.toolCalls.length > 0) { + for (const tc of m.toolCalls) { + const resultBlob = + tc.error !== undefined + ? `error=${tc.error}` + : tc.result !== undefined + ? `result=${JSON.stringify(tc.result).slice(0, 300)}` + : "(no result)"; + line += `\n (tool_call:${tc.name} ${JSON.stringify(tc.input).slice(0, 200)}) → ${resultBlob}`; + } + } + parts.push(line); + } else if (m.role === "tool") { + parts.push(`tool_result: ${m.content}`); + } else { + parts.push(`System: ${m.content}`); + } + } + + parts.push(""); + parts.push(""); + parts.push("Respond to the last user message."); + const joined = parts.join("\n"); + // Final safety clamp — if we're still over budget, hard-truncate. + if (estimateTokens(joined) > MAX_PROMPT_TOKENS) { + const maxChars = MAX_PROMPT_TOKENS * 4; + return joined.slice(0, maxChars); + } + return joined; +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function parseMode(val: string | undefined): PermissionMode { + if (val === "read-only" || val === "supervised" || val === "autonomous") return val; + if (val === undefined) return "supervised"; + throw new Error( + `Unknown --mode "${val}". Expected one of: read-only, supervised, autonomous.`, + ); +} + +function resolveProfileForChat( + profileName?: string, +): { profile: Profile; name: string } | null { + const config = loadConfig(); + const explicit = profileName; + const active = config.activeProfile ?? undefined; + const name = explicit ?? active; + if (!name) { + printError( + "No active profile and --profile not given. Run 'arc profile switch ' or pass --profile.", + ); + return null; + } + let profile: Profile; + try { + profile = resolveProfile(config, name); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + printError(msg); + return null; + } + if (!profile.tool) { + printError( + `Profile "${name}" has no tool set — cannot run chat. Set a tool with 'arc profile create' or edit ~/.arc/config.json.`, + ); + return null; + } + return { profile, name }; +} + +function buildToolCategories(noTools: boolean): string[] { + if (noTools) return []; + return [ + "profiles (list, show, clone, switch, export)", + "state (active profile, recent launches, doctor)", + "logs + memory + tasks (read + search)", + "skills + remote agents (read)", + "shared layer (read)", + ]; +} + +// --------------------------------------------------------------------------- +// Interactive confirmation +// --------------------------------------------------------------------------- + +function makeInteractiveConfirm( + rl: readline.Interface, +): (prompt: string) => Promise { + return (prompt: string): Promise => { + return new Promise((resolve) => { + rl.question(`\n${pc.yellow("?")} ${prompt} [y/N] `, (answer) => { + const a = answer.trim().toLowerCase(); + resolve(a === "y" || a === "yes"); + }); + }); + }; +} + +function makeNonInteractiveConfirm(): (prompt: string) => Promise { + // In `--once` mode we do not have a persistent readline; auto-deny writes + // to avoid blocking on missing stdin. + return async () => false; +} + +// --------------------------------------------------------------------------- +// Core turn execution +// --------------------------------------------------------------------------- + +async function executeTurn(args: { + client: AgentClient; + registry: ToolRegistry; + ctx: ToolContext; + session: ChatSession; + systemPrompt: string; + userMessage: string; +}): Promise { + const { client, registry, ctx, session, systemPrompt, userMessage } = args; + session.append({ role: "user", content: userMessage }); + + // Proxy the session into the one-shot client as a single prompt. + const prompt = constructPromptFromSession(session); + + let assistantText = ""; + const toolCalls: ToolCallRecord[] = []; + const pendingById = new Map(); + + // Wrap the client.send invocation to surface the system prompt via + // instructions — the one-shot adapters prepend it to the prompt. + const wrappedClient: AgentClient = { + send(p) { + return client.send(p, { instructions: systemPrompt }); + }, + shutdown() { + return client.shutdown(); + }, + }; + + let events: AsyncIterable; + try { + events = runAgent({ client: wrappedClient, registry, ctx }, prompt); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + printError(`Agent failed: ${msg}`); + return; + } + + try { + for await (const ev of events) { + switch (ev.type) { + case "text": + assistantText += ev.content; + printAssistantText(ev.content); + break; + case "thinking": + printThinking(ev.content); + break; + case "tool_call": { + printToolCall(ev.tool, ev.input); + const rec: ToolCallRecord = { + id: ev.id, + name: ev.tool, + input: ev.input, + }; + pendingById.set(ev.id, rec); + toolCalls.push(rec); + break; + } + case "tool_result": { + const rec = pendingById.get(ev.id); + const ok = ev.result.ok; + if (ok) { + if (rec) rec.result = ev.result.output; + printToolResult(ev.tool, ev.result.output, true); + } else { + if (rec) { + rec.error = ev.result.error; + if (ev.result.blocked) rec.confirmed = false; + } + printToolResult(ev.tool, ev.result.error, false); + } + break; + } + case "error": + printError(ev.message); + break; + case "done": + if (ev.reason === "max_turns") { + writeLine(pc.yellow("\n(max tool-turns reached)")); + } + break; + } + } + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + printError(`Stream error: ${msg}`); + } + + // Ensure we terminate the line. + if (assistantText && !assistantText.endsWith("\n")) writeText("\n"); + + session.append({ + role: "assistant", + content: assistantText, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + }); + + try { + saveSession(session); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + printError(`Failed to save session: ${msg}`); + } +} + +// --------------------------------------------------------------------------- +// Entrypoint +// --------------------------------------------------------------------------- + +export async function handleChat(opts: ChatOptions): Promise { + const mode = parseMode(opts.mode); + const resolved = resolveProfileForChat(opts.profile); + if (!resolved) { + process.exit(1); + } + const { profile, name: profileName } = resolved; + + // Agent client. + let client: AgentClient; + try { + client = getAgentClientForProfile(profile); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + printError(msg); + process.exit(1); + return; + } + + // Tool registry. + const registry = new ToolRegistry(); + if (!opts.noTools) { + registerArcTools(registry); + } + + // System prompt. + const config = loadConfig(); + const recentLaunches = getRecentLaunches(3).map((l) => ({ + profile: l.profile, + tool: l.tool, + startedAt: l.timestamp, + exitCode: l.exitCode, + })); + const systemPrompt = buildSystemPrompt({ + config, + recentLaunches, + activeProfile: profile, + arcVersion: getVersion(), + doctorIssues: [], + permissionMode: mode, + toolCategories: buildToolCategories(opts.noTools ?? false), + }); + + // ── Resume or create session ────────────────────────── + let session: ChatSession; + if (opts.session) { + try { + session = loadSession(profileName, opts.session); + printSystem(`Resuming session ${session.id} (${session.messages.length} msgs).`); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + printError(msg); + process.exit(1); + return; + } + } else { + session = new ChatSession({ profileName, permissionMode: mode }); + // Save baseline system context as a system message (useful for resume). + session.append({ + role: "system", + content: `ARC chat session — profile=${profileName}, mode=${mode}, tool=${profile.tool}`, + }); + } + + // ── One-shot mode ───────────────────────────────────── + if (opts.once !== undefined) { + const ctx: ToolContext = { + mode, + confirm: makeNonInteractiveConfirm(), + log: () => {}, + profile, + }; + await executeTurn({ + client, + registry, + ctx, + session, + systemPrompt, + userMessage: opts.once, + }); + try { + await client.shutdown(); + } catch { + /* ignore */ + } + return; + } + + // ── Interactive REPL ────────────────────────────────── + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + terminal: true, + }); + const confirm = makeInteractiveConfirm(rl); + + printSystem( + `ARC chat — profile=${profileName} tool=${profile.tool} mode=${mode}. Type /help for commands, /exit to quit.`, + ); + + let currentMode: PermissionMode = mode; + let currentSession = session; + let currentSystemPrompt = systemPrompt; + + const promptUser = (): Promise => { + return new Promise((resolve) => { + rl.question(pc.bold(pc.cyan("\nyou > ")), (line) => resolve(line)); + }); + }; + + const rebuildSystemPromptForMode = (m: PermissionMode): string => { + return buildSystemPrompt({ + config: loadConfig(), + recentLaunches, + activeProfile: profile, + arcVersion: getVersion(), + doctorIssues: [], + permissionMode: m, + toolCategories: buildToolCategories(opts.noTools ?? false), + }); + }; + + const printHelp = (): void => { + writeLine(""); + writeLine(pc.bold("Commands:")); + writeLine(" /exit, /quit End the session"); + writeLine(" /save Save the session to disk"); + writeLine(" /new Start a new session (forgets current)"); + writeLine(" /mode Switch permission mode (read-only|supervised|autonomous)"); + writeLine(" /clear Clear the in-memory transcript (keeps id)"); + writeLine(" /sessions List saved sessions for this profile"); + writeLine(" /resume Resume a saved session"); + writeLine(" /help Show this help"); + writeLine(""); + }; + + try { + // eslint-disable-next-line no-constant-condition + while (true) { + const input = (await promptUser()).trim(); + if (!input) continue; + + if (input.startsWith("/")) { + const [cmd, ...rest] = input.slice(1).split(/\s+/); + const arg = rest.join(" ").trim(); + + if (cmd === "exit" || cmd === "quit") { + break; + } + if (cmd === "help") { + printHelp(); + continue; + } + if (cmd === "save") { + try { + saveSession(currentSession); + printSystem(`Saved session ${currentSession.id}.`); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + printError(msg); + } + continue; + } + if (cmd === "new") { + currentSession = new ChatSession({ + profileName, + permissionMode: currentMode, + }); + currentSession.append({ + role: "system", + content: `ARC chat session — profile=${profileName}, mode=${currentMode}, tool=${profile.tool}`, + }); + printSystem(`Started new session ${currentSession.id}.`); + continue; + } + if (cmd === "mode") { + try { + const newMode = parseMode(arg); + currentMode = newMode; + currentSession.permissionMode = newMode; + currentSystemPrompt = rebuildSystemPromptForMode(newMode); + printSystem(`Permission mode → ${newMode}.`); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + printError(msg); + } + continue; + } + if (cmd === "clear") { + currentSession.messages = []; + currentSession.append({ + role: "system", + content: `ARC chat session — profile=${profileName}, mode=${currentMode}, tool=${profile.tool}`, + }); + printSystem("Transcript cleared."); + continue; + } + if (cmd === "sessions") { + const summaries = listSessions(profileName); + if (summaries.length === 0) { + printSystem("No saved sessions."); + } else { + writeLine(""); + for (const s of summaries.slice(0, 20)) { + const marker = s.id === currentSession.id ? pc.green("*") : " "; + writeLine( + ` ${marker} ${pc.cyan(s.id)} ${pc.dim(s.updatedAt)} ${pc.dim(`(${s.messageCount} msgs)`)} ${s.summary}`, + ); + } + writeLine(""); + } + continue; + } + if (cmd === "resume") { + if (!arg) { + printError("/resume — pass a session id from /sessions."); + continue; + } + try { + currentSession = loadSession(profileName, arg); + currentMode = currentSession.permissionMode; + currentSystemPrompt = rebuildSystemPromptForMode(currentMode); + printSystem( + `Resumed ${currentSession.id} (${currentSession.messages.length} msgs, mode=${currentMode}).`, + ); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + printError(msg); + } + continue; + } + printError(`Unknown command: /${cmd}. Try /help.`); + continue; + } + + const ctx: ToolContext = { + mode: currentMode, + confirm, + log: () => {}, + profile, + }; + await executeTurn({ + client, + registry, + ctx, + session: currentSession, + systemPrompt: currentSystemPrompt, + userMessage: input, + }); + } + } finally { + rl.close(); + try { + await client.shutdown(); + } catch { + /* ignore */ + } + } + + printSystem("Goodbye."); +} diff --git a/packages/core/src/chat/index.ts b/packages/core/src/chat/index.ts new file mode 100644 index 0000000..1df02fc --- /dev/null +++ b/packages/core/src/chat/index.ts @@ -0,0 +1,22 @@ +/** + * Chat session public surface — Phase 4. + * + * See docs/plans/ai-and-roundtable.md — Phase 4 for the overall design. + */ + +export { + ChatSession, + type ChatMessage, + type ToolCallRecord, + type ChatSessionJson, + type ChatSessionSummary, +} from "./session.js"; + +export { + saveSession, + loadSession, + listSessions, + deleteSession, + getChatSessionsDir, + getChatSessionPath, +} from "./store.js"; diff --git a/packages/core/src/chat/session.ts b/packages/core/src/chat/session.ts new file mode 100644 index 0000000..b0e6521 --- /dev/null +++ b/packages/core/src/chat/session.ts @@ -0,0 +1,184 @@ +/** + * Chat session primitive — a replayable transcript of a single `arc chat` + * conversation. Serialized to JSON on disk for resume support. + * + * See docs/plans/ai-and-roundtable.md — Phase 4. + */ + +import crypto from "node:crypto"; +import type { PermissionMode } from "../agent/types.js"; + +/** A recorded tool invocation within an assistant turn. */ +export interface ToolCallRecord { + id: string; + name: string; + input: unknown; + result?: unknown; + error?: string; + confirmed?: boolean; +} + +/** A single message in the chat transcript. */ +export interface ChatMessage { + role: "user" | "assistant" | "system" | "tool"; + content: string; + toolCalls?: ToolCallRecord[]; + /** Present on `role: "tool"` messages — points back at the originating tool_call id. */ + toolCallId?: string; + timestamp: string; +} + +/** Summary payload for session listings. */ +export interface ChatSessionSummary { + id: string; + summary: string; + profileName: string; + updatedAt: string; + createdAt: string; + messageCount: number; +} + +/** On-disk wire format. */ +export interface ChatSessionJson { + id: string; + profileName: string; + permissionMode: PermissionMode; + messages: ChatMessage[]; + createdAt: string; + updatedAt: string; +} + +function nowIso(): string { + return new Date().toISOString(); +} + +/** + * In-memory chat session. Holds an ordered list of messages and a permission + * mode. Serializes to and from disk via `serialize`/`ChatSession.load`. + */ +export class ChatSession { + readonly id: string; + readonly profileName: string; + readonly createdAt: string; + permissionMode: PermissionMode; + messages: ChatMessage[]; + updatedAt: string; + + constructor(init: { + id?: string; + profileName: string; + permissionMode: PermissionMode; + messages?: ChatMessage[]; + createdAt?: string; + updatedAt?: string; + }) { + this.id = init.id ?? crypto.randomUUID(); + this.profileName = init.profileName; + this.permissionMode = init.permissionMode; + this.messages = init.messages ? [...init.messages] : []; + this.createdAt = init.createdAt ?? nowIso(); + this.updatedAt = init.updatedAt ?? this.createdAt; + } + + /** Append a message and bump `updatedAt`. Returns the stored message. */ + append(msg: Omit & { timestamp?: string }): ChatMessage { + const stored: ChatMessage = { + ...msg, + timestamp: msg.timestamp ?? nowIso(), + }; + this.messages.push(stored); + this.updatedAt = stored.timestamp; + return stored; + } + + /** Short, human-readable summary — first user message truncated to 60 chars. */ + summary(): string { + const firstUser = this.messages.find((m) => m.role === "user"); + if (!firstUser) return "(empty session)"; + const oneLine = firstUser.content.replace(/\s+/g, " ").trim(); + if (oneLine.length <= 60) return oneLine; + return oneLine.slice(0, 57) + "..."; + } + + /** Produce the on-disk representation. */ + serialize(): ChatSessionJson { + return { + id: this.id, + profileName: this.profileName, + permissionMode: this.permissionMode, + messages: this.messages, + createdAt: this.createdAt, + updatedAt: this.updatedAt, + }; + } + + /** + * Rebuild a `ChatSession` from parsed JSON. Throws on structural errors + * (missing id / profileName, malformed messages). + */ + static load(json: unknown): ChatSession { + if (typeof json !== "object" || json === null) { + throw new Error("Invalid ChatSession JSON: not an object"); + } + const obj = json as Record; + if (typeof obj["id"] !== "string" || obj["id"].length === 0) { + throw new Error("Invalid ChatSession JSON: missing id"); + } + if (typeof obj["profileName"] !== "string") { + throw new Error("Invalid ChatSession JSON: missing profileName"); + } + const mode = obj["permissionMode"]; + if ( + mode !== "read-only" && + mode !== "supervised" && + mode !== "autonomous" + ) { + throw new Error(`Invalid ChatSession JSON: bad permissionMode "${String(mode)}"`); + } + if (!Array.isArray(obj["messages"])) { + throw new Error("Invalid ChatSession JSON: messages must be an array"); + } + const messages: ChatMessage[] = obj["messages"].map((m, i) => { + if (typeof m !== "object" || m === null) { + throw new Error(`Invalid ChatSession JSON: message[${i}] not an object`); + } + const mm = m as Record; + const role = mm["role"]; + if ( + role !== "user" && + role !== "assistant" && + role !== "system" && + role !== "tool" + ) { + throw new Error(`Invalid ChatSession JSON: message[${i}].role is invalid`); + } + if (typeof mm["content"] !== "string") { + throw new Error(`Invalid ChatSession JSON: message[${i}].content must be a string`); + } + if (typeof mm["timestamp"] !== "string") { + throw new Error(`Invalid ChatSession JSON: message[${i}].timestamp must be a string`); + } + const out: ChatMessage = { + role, + content: mm["content"], + timestamp: mm["timestamp"], + }; + if (Array.isArray(mm["toolCalls"])) { + out.toolCalls = mm["toolCalls"] as ToolCallRecord[]; + } + if (typeof mm["toolCallId"] === "string") { + out.toolCallId = mm["toolCallId"]; + } + return out; + }); + + return new ChatSession({ + id: obj["id"], + profileName: obj["profileName"], + permissionMode: mode, + messages, + createdAt: typeof obj["createdAt"] === "string" ? obj["createdAt"] : nowIso(), + updatedAt: typeof obj["updatedAt"] === "string" ? obj["updatedAt"] : nowIso(), + }); + } +} diff --git a/packages/core/src/chat/store.ts b/packages/core/src/chat/store.ts new file mode 100644 index 0000000..ed6c5f0 --- /dev/null +++ b/packages/core/src/chat/store.ts @@ -0,0 +1,110 @@ +/** + * Per-profile chat session store. + * + * Sessions live under `/chat-sessions/.json`. We create + * the directory lazily on first save and use an atomic `write → rename` to + * avoid torn files on crash. + */ + +import fs from "node:fs"; +import path from "node:path"; +import crypto from "node:crypto"; +import { getProfileDir } from "../paths.js"; +import { + ChatSession, + type ChatSessionJson, + type ChatSessionSummary, +} from "./session.js"; + +/** Directory holding all chat sessions for a profile. */ +export function getChatSessionsDir(profileName: string): string { + return path.join(getProfileDir(profileName), "chat-sessions"); +} + +/** Full path to one chat session file. */ +export function getChatSessionPath(profileName: string, sessionId: string): string { + // Defensive: block path-traversal via malformed sessionId. + if (!/^[A-Za-z0-9_.-]+$/.test(sessionId)) { + throw new Error(`Invalid chat session id: "${sessionId}"`); + } + return path.join(getChatSessionsDir(profileName), `${sessionId}.json`); +} + +function atomicWrite(target: string, contents: string): void { + const dir = path.dirname(target); + fs.mkdirSync(dir, { recursive: true }); + const tmp = path.join( + dir, + `.${path.basename(target)}.${crypto.randomBytes(4).toString("hex")}.tmp`, + ); + fs.writeFileSync(tmp, contents, "utf-8"); + fs.renameSync(tmp, target); +} + +/** + * Persist a `ChatSession` to disk under its profile's directory. + * Creates `chat-sessions/` lazily. Atomic via temp-file + rename. + */ +export function saveSession(session: ChatSession): void { + const target = getChatSessionPath(session.profileName, session.id); + const json = session.serialize(); + atomicWrite(target, JSON.stringify(json, null, 2)); +} + +/** + * Load a `ChatSession` by id from a profile's sessions dir. Throws if the + * file is missing or malformed. + */ +export function loadSession(profileName: string, id: string): ChatSession { + const p = getChatSessionPath(profileName, id); + if (!fs.existsSync(p)) { + throw new Error(`Chat session not found: ${profileName}/${id}`); + } + const raw = fs.readFileSync(p, "utf-8"); + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + throw new Error(`Chat session JSON is malformed (${profileName}/${id}): ${msg}`); + } + return ChatSession.load(parsed); +} + +/** + * List all saved sessions for a profile, sorted by `updatedAt` descending. + * Returns summaries (no full message bodies) for display. + */ +export function listSessions(profileName: string): ChatSessionSummary[] { + const dir = getChatSessionsDir(profileName); + if (!fs.existsSync(dir)) return []; + + const out: ChatSessionSummary[] = []; + for (const entry of fs.readdirSync(dir)) { + if (!entry.endsWith(".json")) continue; + const full = path.join(dir, entry); + try { + const parsed = JSON.parse(fs.readFileSync(full, "utf-8")) as ChatSessionJson; + const session = ChatSession.load(parsed); + out.push({ + id: session.id, + summary: session.summary(), + profileName: session.profileName, + updatedAt: session.updatedAt, + createdAt: session.createdAt, + messageCount: session.messages.length, + }); + } catch { + // Skip unreadable files rather than erroring the whole listing. + continue; + } + } + out.sort((a, b) => (a.updatedAt < b.updatedAt ? 1 : a.updatedAt > b.updatedAt ? -1 : 0)); + return out; +} + +/** Delete a saved session. No-op if the file does not exist. */ +export function deleteSession(profileName: string, id: string): void { + const p = getChatSessionPath(profileName, id); + if (fs.existsSync(p)) fs.rmSync(p, { force: true }); +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 122a69e..3d2d68b 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -9,6 +9,8 @@ export * from "./history.js"; export * from "./agent-client/index.js"; export * from "./agent/index.js"; export * from "./knowledge/index.js"; +export * from "./chat/index.js"; +export * from "./orchestration/index.js"; export * from "./lifecycle.js"; export * from "./logging.js"; export * from "./paths.js"; diff --git a/packages/core/src/orchestration/delivery-policy.ts b/packages/core/src/orchestration/delivery-policy.ts new file mode 100644 index 0000000..4d48870 --- /dev/null +++ b/packages/core/src/orchestration/delivery-policy.ts @@ -0,0 +1,220 @@ +// Ported from agent-forge/lib/agent-delivery.ts — see docs/plans/ai-and-roundtable.md AD-7 +/** + * Adaptive delivery pacing for the roundtable orchestrator. + * + * Each model family (Gemini, Claude, Codex, default) has a built-in profile + * with min/max grace periods and an adaptive multiplier. The orchestrator + * tracks a rolling EMA of observed reply latency per agent and uses it to + * compute the next "grace" pause between turns — faster agents breathe less, + * slower ones get a longer runway so the group stays in rhythm. + * + * Coalescing helpers and a tiny priority queue are exposed for message + * batching when multiple pending messages target the same agent. + */ + +import type { AgentTool } from "../types.js"; + +// ─── Constants ─────────────────────────────────────────────────────── + +const DEFAULT_MIN_REPLY_GRACE_MS = 10_000; +const DEFAULT_MAX_REPLY_GRACE_MS = 90_000; +const DEFAULT_ADAPTIVE_REPLY_MULTIPLIER = 1.3; +const DEFAULT_BROADCAST_COALESCE_WINDOW_MS = 12_000; + +// EMA weights — 70% weight on previous observation, 30% on new sample. +const EMA_PREVIOUS_WEIGHT = 0.7; +const EMA_SAMPLE_WEIGHT = 0.3; + +// ─── Types ─────────────────────────────────────────────────────────── + +/** + * Delivery policy governing how aggressively the orchestrator paces turns + * between agents. All durations are in milliseconds. + */ +export interface AgentDeliveryPolicy { + /** Lower bound on between-turn grace. Fast agents still wait this long. */ + minReplyGraceMs: number; + /** Upper bound on between-turn grace. Slow agents never block longer. */ + maxReplyGraceMs: number; + /** Multiplier applied to the rolling EMA latency when computing grace. */ + adaptiveReplyMultiplier: number; + /** Window within which adjacent broadcasts are candidates to coalesce. */ + broadcastCoalesceWindowMs: number; + /** When false, pacing is disabled entirely (grace = 0). */ + respectAgentPace: boolean; + /** When true, direct (non-broadcast) messages bypass pacing. */ + directMessageBypass: boolean; +} + +/** EMA-tracked latency state, per agent. */ +export interface DeliveryState { + /** Rolling latency average — milliseconds. 0 means no samples yet. */ + observedLatencyMs: number; + /** Number of samples folded into the EMA. */ + sampleCount: number; +} + +export type MessagePriority = "urgent" | "normal" | "low"; + +/** Simple priority-queue entry for coalescing pending messages. */ +export interface PriorityQueueItem { + text: string; + priority: MessagePriority; + createdAt: number; +} + +// ─── Built-in model profiles ───────────────────────────────────────── + +const GEMINI_PROFILE: AgentDeliveryPolicy = { + minReplyGraceMs: 18_000, + maxReplyGraceMs: 120_000, + adaptiveReplyMultiplier: 1.6, + broadcastCoalesceWindowMs: 20_000, + respectAgentPace: true, + directMessageBypass: true, +}; + +const CLAUDE_PROFILE: AgentDeliveryPolicy = { + minReplyGraceMs: 12_000, + maxReplyGraceMs: 90_000, + adaptiveReplyMultiplier: 1.45, + broadcastCoalesceWindowMs: 14_000, + respectAgentPace: true, + directMessageBypass: true, +}; + +const CODEX_PROFILE: AgentDeliveryPolicy = { + minReplyGraceMs: 8_000, + maxReplyGraceMs: 60_000, + adaptiveReplyMultiplier: 1.2, + broadcastCoalesceWindowMs: 8_000, + respectAgentPace: true, + directMessageBypass: true, +}; + +const DEFAULT_PROFILE: AgentDeliveryPolicy = { + minReplyGraceMs: DEFAULT_MIN_REPLY_GRACE_MS, + maxReplyGraceMs: DEFAULT_MAX_REPLY_GRACE_MS, + adaptiveReplyMultiplier: DEFAULT_ADAPTIVE_REPLY_MULTIPLIER, + broadcastCoalesceWindowMs: DEFAULT_BROADCAST_COALESCE_WINDOW_MS, + respectAgentPace: true, + directMessageBypass: true, +}; + +// ─── Helpers ───────────────────────────────────────────────────────── + +function clamp(value: number, min: number, max: number): number { + return Math.min(max, Math.max(min, value)); +} + +// ─── Public API ────────────────────────────────────────────────────── + +/** Fresh EMA state, no samples observed yet. */ +export function createDeliveryState(): DeliveryState { + return { observedLatencyMs: 0, sampleCount: 0 }; +} + +/** + * Compute the next between-turn grace period given the current EMA state + * and the active policy. Pure function — no side effects. + * + * Semantics: + * - policy.respectAgentPace === false → returns 0 + * - no samples yet → returns policy.minReplyGraceMs + * - otherwise → clamp(observed * multiplier, min, max) + */ +export function computeAdaptiveGraceMs( + policy: AgentDeliveryPolicy, + state: DeliveryState, +): number { + if (!policy.respectAgentPace) return 0; + if (state.sampleCount <= 0 || !(state.observedLatencyMs > 0)) { + return policy.minReplyGraceMs; + } + const projected = state.observedLatencyMs * policy.adaptiveReplyMultiplier; + return clamp(projected, policy.minReplyGraceMs, policy.maxReplyGraceMs); +} + +/** + * Fold a new latency sample into the EMA state. + * + * First sample seeds the EMA; subsequent samples decay prior state 70% + * and blend the new sample at 30%. Invalid samples (≤0 or non-finite) + * are ignored. + */ +export function updateReplyLatencyAverage( + state: DeliveryState, + newLatencyMs: number, +): DeliveryState { + if (!Number.isFinite(newLatencyMs) || newLatencyMs <= 0) return state; + if (state.sampleCount <= 0 || state.observedLatencyMs <= 0) { + return { observedLatencyMs: Math.round(newLatencyMs), sampleCount: 1 }; + } + const blended = + state.observedLatencyMs * EMA_PREVIOUS_WEIGHT + + newLatencyMs * EMA_SAMPLE_WEIGHT; + return { + observedLatencyMs: Math.round(blended), + sampleCount: state.sampleCount + 1, + }; +} + +/** Dispatch the right built-in policy for a tool identifier. */ +export function resolveDeliveryPolicyForTool( + tool: AgentTool | undefined, +): AgentDeliveryPolicy { + if (!tool) return { ...DEFAULT_PROFILE }; + const key = tool.toLowerCase(); + if (key.includes("gemini")) return { ...GEMINI_PROFILE }; + if (key.includes("claude")) return { ...CLAUDE_PROFILE }; + if (key.includes("codex")) return { ...CODEX_PROFILE }; + return { ...DEFAULT_PROFILE }; +} + +// ─── Priority queue (coalescing) ───────────────────────────────────── + +const PRIORITY_ORDER: Record = { + urgent: 0, + normal: 1, + low: 2, +}; + +/** + * Minimal priority queue for message coalescing. Stable FIFO within a + * priority level; higher-priority items drain first. + * + * Not thread-safe — intended for use from a single orchestrator loop. + */ +export class MessagePriorityQueue { + private readonly items: PriorityQueueItem[] = []; + + enqueue(item: PriorityQueueItem): void { + this.items.push(item); + this.items.sort((a, b) => { + const dp = PRIORITY_ORDER[a.priority] - PRIORITY_ORDER[b.priority]; + if (dp !== 0) return dp; + return a.createdAt - b.createdAt; + }); + } + + dequeue(): PriorityQueueItem | undefined { + return this.items.shift(); + } + + peek(): PriorityQueueItem | undefined { + return this.items[0]; + } + + get size(): number { + return this.items.length; + } + + clear(): void { + this.items.length = 0; + } + + /** Snapshot (defensive copy) of all pending items. */ + snapshot(): PriorityQueueItem[] { + return [...this.items]; + } +} diff --git a/packages/core/src/orchestration/index.ts b/packages/core/src/orchestration/index.ts new file mode 100644 index 0000000..a4f00ff --- /dev/null +++ b/packages/core/src/orchestration/index.ts @@ -0,0 +1,62 @@ +/** + * Orchestration layer — drives the hook pipeline proactively. + * + * Public exports: + * - Adaptive delivery policy + EMA latency tracking (`delivery-policy.ts`) + * - Staged PLAN/EXEC/VERIFY workflow (`staged-workflow.ts`) + * - Stall watchdog (`watchdog.ts`) + * - Roundtable orchestrator (`roundtable.ts`) + */ + +// Delivery policy +export { + computeAdaptiveGraceMs, + createDeliveryState, + resolveDeliveryPolicyForTool, + updateReplyLatencyAverage, + MessagePriorityQueue, + type AgentDeliveryPolicy, + type DeliveryState, + type MessagePriority, + type PriorityQueueItem, +} from "./delivery-policy.js"; + +// Staged workflow +export { + DEFAULT_COMPLETION_PATTERNS, + InMemoryMessageBus, + StagedWorkflowManager, + type MessageBusReadResult, + type StagedMessage, + type StagedMessageBus, + type StagedPhase, + type StagedWorkflowConfig, + type StagedWorkflowManagerDeps, + type StagedWorkflowResult, + type StagedWorkflowTerminal, +} from "./staged-workflow.js"; + +// Watchdog +export { + AgentWatchdog, + isLowSignalWatchdogReply, + WATCHDOG_NUDGE_AFTER_MS, + WATCHDOG_NUDGE_TEXT, + WATCHDOG_STALL_AFTER_MS, + WATCHDOG_STALL_OPTIONS, + type AgentWatchdogDeps, + type WatchdogEvent, + type WatchdogEventType, +} from "./watchdog.js"; + +// Roundtable +export { + RoundtableOrchestrator, + type RoundtableAgent, + type RoundtableEvent, + type RoundtableMessage, + type RoundtableOrchestratorOptions, + type RoundtableResult, + type RoundtableRole, + type RoundtableRunOptions, +} from "./roundtable.js"; diff --git a/packages/core/src/orchestration/roundtable.ts b/packages/core/src/orchestration/roundtable.ts new file mode 100644 index 0000000..73a5864 --- /dev/null +++ b/packages/core/src/orchestration/roundtable.ts @@ -0,0 +1,559 @@ +/** + * Roundtable orchestrator — drives the existing `roundtable` hook from + * outside. The hook is reactive (it reacts to messages entering the + * pipeline). This orchestrator is proactive: it iterates turns, calls each + * agent's AgentClient, feeds the response back through HookBus.runPost() so + * the hook's state machine advances normally. + * + * Design notes: + * - Virtual agents (multiple roles sharing one profile) are deferred to + * Phase 5.1. The type surface accepts `virtualRole`, but the runner + * throws if one is supplied without a dedicated profile. + * - `launchMode` is forced to "worker" for every agent profile: the + * orchestrator requires captured stdout streams, not TTY handoff. + * - Synthesizer JSON parsing is tolerant: if the LLM returns prose, we + * return `consensus = 0.5` and `summary = raw text`. + */ + +import { writeLogEvent } from "../logging.js"; +import type { Profile } from "../types.js"; +import { HookBus } from "../hooks/hook-bus.js"; +import { HookStateStore } from "../hooks/hook-state.js"; +import { + createRoundtableHook, + type RoundtableState, +} from "../hooks/roundtable.js"; +import type { + AgentResponse, + HookContext, +} from "../hooks/types.js"; +import type { + AgentChunk, + AgentClient, + AgentSendOptions, +} from "../agent-client/types.js"; +import { getAgentClientForProfile } from "../agent-client/dispatch.js"; +import { + computeAdaptiveGraceMs, + createDeliveryState, + resolveDeliveryPolicyForTool, + updateReplyLatencyAverage, + type AgentDeliveryPolicy, + type DeliveryState, +} from "./delivery-policy.js"; + +// ─── Public types ──────────────────────────────────────────────────── + +export type RoundtableRole = + | "advocate" + | "critic" + | "neutral" + | "synthesizer"; + +/** + * One participant in a roundtable. + * + * v1 requires `profile`. `virtualRole` is reserved for Phase 5.1, where + * multiple logical roles can share a single profile. + */ +export interface RoundtableAgent { + profile?: Profile; + virtualRole?: string; + role: RoundtableRole; + displayName?: string; +} + +export interface RoundtableRunOptions { + topic: string; + agents: RoundtableAgent[]; + /** Number of discussion rounds. Defaults to 2. */ + rounds?: number; + /** Agent that writes the final summary. Defaults to first entry in `agents`. */ + synthesizer?: RoundtableAgent; + /** Session id to scope hook state. Auto-generated if omitted. */ + sessionId?: string; + /** Abort signal — propagated into each agent call. */ + signal?: AbortSignal; + /** Live progress observer. */ + onEvent?: (evt: RoundtableEvent) => void; +} + +export interface RoundtableMessage { + agent: string; + role: RoundtableRole; + round: number; + content: string; + createdAt: number; + latencyMs: number; +} + +export type RoundtableEvent = + | { + type: "turn-start"; + agent: string; + role: RoundtableRole; + round: number; + turnIndex: number; + } + | { type: "turn-chunk"; agent: string; chunk: AgentChunk } + | { + type: "turn-complete"; + agent: string; + round: number; + latencyMs: number; + content: string; + } + | { + type: "phase-change"; + status: RoundtableState["status"]; + } + | { type: "synthesis-start"; agent: string } + | { + type: "synthesis-complete"; + consensusScore: number; + summary: string; + } + | { type: "error"; message: string; agent?: string }; + +export interface RoundtableResult { + transcript: RoundtableMessage[]; + synthesis: string; + consensusScore: number; + keyPoints: string[]; + durationMs: number; + roundtableId: string; +} + +// ─── Internals ─────────────────────────────────────────────────────── + +const COMPONENT = "orchestration:roundtable"; + +interface PreparedAgent { + id: string; + role: RoundtableRole; + profile: Profile; + client: AgentClient; + policy: AgentDeliveryPolicy; + delivery: DeliveryState; +} + +function generateSessionId(): string { + return `rt-session-${Date.now().toString(36)}-${Math.random() + .toString(36) + .slice(2, 8)}`; +} + +function agentIdentifier(agent: RoundtableAgent, index: number): string { + if (agent.displayName) return agent.displayName; + if (agent.profile?.tool) { + return `${agent.profile.tool}-${index}`; + } + return `agent-${index}`; +} + +function roleInstruction(role: RoundtableRole, topic: string): string { + switch (role) { + case "advocate": + return `You are the ADVOCATE in a roundtable discussion on: "${topic}". Argue the strongest case for taking action. Be concrete and cite evidence.`; + case "critic": + return `You are the CRITIC in a roundtable discussion on: "${topic}". Challenge weak claims. Name the biggest risks the group is glossing over.`; + case "neutral": + return `You are a NEUTRAL participant in a roundtable discussion on: "${topic}". Weigh tradeoffs honestly without advocating a side.`; + case "synthesizer": + return `You are the SYNTHESIZER in a roundtable discussion on: "${topic}". Your job is to summarize the conversation fairly and score consensus.`; + } +} + +function renderTranscript(transcript: RoundtableMessage[]): string { + if (transcript.length === 0) return "(no prior messages)"; + return transcript + .map( + (m) => + `[Round ${m.round}] ${m.agent} (${m.role}):\n${m.content.trim()}`, + ) + .join("\n\n"); +} + +function buildTurnPrompt( + topic: string, + role: RoundtableRole, + round: number, + totalRounds: number, + transcript: RoundtableMessage[], +): string { + return [ + roleInstruction(role, topic), + `This is round ${round} of ${totalRounds}.`, + "Transcript so far:", + renderTranscript(transcript), + "", + "Write your response now. Be focused — 3-6 sentences.", + ].join("\n"); +} + +function buildSynthesisPrompt( + topic: string, + transcript: RoundtableMessage[], +): string { + return [ + roleInstruction("synthesizer", topic), + "Summarize the discussion and output ONLY a JSON object with this shape:", + '{ "consensus": , "summary": , "keyPoints": }', + "", + "Transcript:", + renderTranscript(transcript), + "", + "Output JSON now. No prose, no markdown fences.", + ].join("\n"); +} + +async function collectAgentResponse( + client: AgentClient, + prompt: string, + opts: AgentSendOptions, + agentId: string, + onEvent?: (evt: RoundtableEvent) => void, +): Promise<{ content: string; error?: string }> { + const parts: string[] = []; + let error: string | undefined; + try { + for await (const chunk of client.send(prompt, opts)) { + if (onEvent) { + try { + onEvent({ type: "turn-chunk", agent: agentId, chunk }); + } catch { + /* observer errors non-fatal */ + } + } + if (chunk.type === "text") { + parts.push(chunk.content); + } else if (chunk.type === "error") { + error = chunk.message; + } + } + } catch (err) { + error = err instanceof Error ? err.message : String(err); + } + return { content: parts.join(""), error }; +} + +/** + * Tolerant synthesizer parser. + * + * - Finds the first balanced JSON object in the response (strips markdown + * fences, prose prefix/suffix). + * - On failure, returns a neutral 0.5 consensus and the raw text as summary. + */ +function parseSynthesis(raw: string): { + consensus: number; + summary: string; + keyPoints: string[]; +} { + const fallback = { + consensus: 0.5, + summary: raw.trim() || "(no synthesizer output)", + keyPoints: [] as string[], + }; + if (!raw.trim()) return fallback; + + const start = raw.indexOf("{"); + const end = raw.lastIndexOf("}"); + if (start < 0 || end <= start) return fallback; + const candidate = raw.slice(start, end + 1); + + try { + const parsed = JSON.parse(candidate) as { + consensus?: unknown; + summary?: unknown; + keyPoints?: unknown; + }; + const consensus = + typeof parsed.consensus === "number" && + Number.isFinite(parsed.consensus) + ? Math.max(0, Math.min(1, parsed.consensus)) + : 0.5; + const summary = + typeof parsed.summary === "string" && parsed.summary.trim() + ? parsed.summary + : raw.trim(); + const keyPoints = Array.isArray(parsed.keyPoints) + ? parsed.keyPoints + .filter((kp): kp is string => typeof kp === "string") + .map((kp) => kp.trim()) + .filter(Boolean) + : []; + return { consensus, summary, keyPoints }; + } catch { + return fallback; + } +} + +// ─── Orchestrator ──────────────────────────────────────────────────── + +export interface RoundtableOrchestratorOptions { + /** Optional custom client factory — lets tests inject mocks. */ + clientFactory?: (profile: Profile) => AgentClient; + /** Sleep function — swap for deterministic tests. */ + sleep?: (ms: number) => Promise; + /** Clock override. */ + now?: () => number; +} + +export class RoundtableOrchestrator { + private readonly clientFactory: (profile: Profile) => AgentClient; + private readonly sleep: (ms: number) => Promise; + private readonly now: () => number; + + constructor(options: RoundtableOrchestratorOptions = {}) { + this.clientFactory = + options.clientFactory ?? + ((profile: Profile) => getAgentClientForProfile(profile)); + this.sleep = + options.sleep ?? ((ms) => new Promise((r) => setTimeout(r, ms))); + this.now = options.now ?? Date.now; + } + + async run(opts: RoundtableRunOptions): Promise { + const start = this.now(); + const rounds = opts.rounds ?? 2; + const sessionId = opts.sessionId ?? generateSessionId(); + + if (opts.agents.length < 2) { + throw new Error( + `Roundtable requires at least 2 agents — got ${opts.agents.length}`, + ); + } + + // Prepare agents (forcing worker launchMode). + const prepared: PreparedAgent[] = opts.agents.map((agent, i) => { + if (agent.virtualRole && !agent.profile) { + throw new Error( + `Virtual agents not yet supported — assign a profile (agent index ${i}, role "${agent.virtualRole}"). ` + + `TODO Phase 5.1: allow role-sharing on one profile.`, + ); + } + if (!agent.profile) { + throw new Error( + `Agent at index ${i} is missing a profile. All v1 roundtable agents need an ARC profile.`, + ); + } + const forcedProfile: Profile = { ...agent.profile, launchMode: "worker" }; + const client = this.clientFactory(forcedProfile); + const policy = resolveDeliveryPolicyForTool(forcedProfile.tool); + return { + id: agentIdentifier(agent, i), + role: agent.role, + profile: forcedProfile, + client, + policy, + delivery: createDeliveryState(), + }; + }); + + const synthesizerChoice = opts.synthesizer ?? opts.agents[0]; + const synthesizer = + prepared.find( + (p, i) => p.id === agentIdentifier(synthesizerChoice, i), + ) ?? + prepared.find((p) => p.role === "synthesizer") ?? + prepared[0]; + + // Build a dedicated HookBus + state store so the orchestrator doesn't + // clobber any externally-managed pipeline. + const stateStore = new HookStateStore(); + const hookBus = new HookBus(); + const hook = createRoundtableHook(stateStore, { defaultRounds: rounds }); + hookBus.register(hook, { enabled: true }); + + // Fire the trigger to initialize hook state. We use the first agent's + // id as the "adapter" value for the trigger message. + // Compose the trigger so agents: stays on its own line (the hook's + // parser greedy-matches to end-of-line, so we terminate with \n). + const triggerMessage = [ + `@roundtable rounds: ${rounds}`, + `agents: ${prepared.map((p) => p.id).join(", ")}`, + opts.topic, + ].join("\n"); + + const baseCtx = (adapter: string, message: string): HookContext => ({ + message, + sessionId, + profile: prepared[0].profile, + adapter, + hookMetadata: {}, + }); + + await hookBus.runPre( + baseCtx(prepared[0].id, triggerMessage), + "log", + "pre-message", + ); + + const state = stateStore.get( + sessionId, + "roundtable", + "roundtableState", + ); + if (!state) { + throw new Error( + "Roundtable hook failed to initialize — no state after trigger", + ); + } + + const transcript: RoundtableMessage[] = []; + const totalTurns = rounds * prepared.length; + let turnsCompleted = 0; + + opts.onEvent?.({ type: "phase-change", status: state.status }); + + // Main loop — drive the hook's state machine. + while (turnsCompleted < totalTurns) { + if (opts.signal?.aborted) { + throw new Error("Roundtable aborted"); + } + + const live = stateStore.get( + sessionId, + "roundtable", + "roundtableState", + ); + if (!live || live.status !== "active") break; + + const turnIndex = live.currentTurnIndex; + const round = live.currentRound; + const agent = prepared[turnIndex]; + if (!agent) { + throw new Error( + `Turn index ${turnIndex} out of range (agents=${prepared.length})`, + ); + } + + opts.onEvent?.({ + type: "turn-start", + agent: agent.id, + role: agent.role, + round, + turnIndex, + }); + + const prompt = buildTurnPrompt( + opts.topic, + agent.role, + round, + rounds, + transcript, + ); + + const turnStart = this.now(); + const { content, error } = await collectAgentResponse( + agent.client, + prompt, + { signal: opts.signal }, + agent.id, + opts.onEvent, + ); + const latencyMs = this.now() - turnStart; + + if (error) { + opts.onEvent?.({ type: "error", message: error, agent: agent.id }); + writeLogEvent({ + level: "warn", + component: COMPONENT, + action: "turn-error", + message: `Agent '${agent.id}' errored during turn: ${error}`, + data: { sessionId, round, turnIndex }, + }); + } + + transcript.push({ + agent: agent.id, + role: agent.role, + round, + content, + createdAt: this.now(), + latencyMs, + }); + + // Feed response back into the hook so state advances. + const response: AgentResponse = { content }; + await hookBus.runPost( + baseCtx(agent.id, prompt), + response, + "log", + "post-message", + ); + + opts.onEvent?.({ + type: "turn-complete", + agent: agent.id, + round, + latencyMs, + content, + }); + + // Update adaptive pacing and sleep. + agent.delivery = updateReplyLatencyAverage(agent.delivery, latencyMs); + const graceMs = computeAdaptiveGraceMs(agent.policy, agent.delivery); + + turnsCompleted++; + const stateAfter = stateStore.get( + sessionId, + "roundtable", + "roundtableState", + ); + if (stateAfter && stateAfter.status !== "active") { + opts.onEvent?.({ type: "phase-change", status: stateAfter.status }); + } + + if (turnsCompleted < totalTurns && graceMs > 0) { + await this.sleep(graceMs); + } + } + + // Synthesis. + opts.onEvent?.({ type: "synthesis-start", agent: synthesizer.id }); + + const synthesisPrompt = buildSynthesisPrompt(opts.topic, transcript); + const { content: rawSynthesis, error: synthesisError } = + await collectAgentResponse( + synthesizer.client, + synthesisPrompt, + { signal: opts.signal }, + synthesizer.id, + opts.onEvent, + ); + if (synthesisError) { + opts.onEvent?.({ + type: "error", + message: synthesisError, + agent: synthesizer.id, + }); + } + + const parsed = parseSynthesis(rawSynthesis); + + // Feed synthesis back through the hook so status flips to "complete". + await hookBus.runPost( + baseCtx(synthesizer.id, synthesisPrompt), + { content: rawSynthesis }, + "log", + "post-message", + ); + + opts.onEvent?.({ + type: "synthesis-complete", + consensusScore: parsed.consensus, + summary: parsed.summary, + }); + opts.onEvent?.({ type: "phase-change", status: "complete" }); + + return { + transcript, + synthesis: parsed.summary, + consensusScore: parsed.consensus, + keyPoints: parsed.keyPoints, + durationMs: this.now() - start, + roundtableId: state.id, + }; + } +} diff --git a/packages/core/src/orchestration/staged-workflow.ts b/packages/core/src/orchestration/staged-workflow.ts new file mode 100644 index 0000000..7b62116 --- /dev/null +++ b/packages/core/src/orchestration/staged-workflow.ts @@ -0,0 +1,265 @@ +// Ported from agent-forge/lib/staged-workflow.ts — see docs/plans/ai-and-roundtable.md AD-7 +/** + * Staged workflow — PLAN → EXEC → VERIFY state machine. + * + * The manager advances one phase at a time. To leave a phase, every + * participating agent must have posted a message matching that phase's + * completion regex set. If the phase timeout expires first, the manager + * advances anyway and records the timeout in the transcript. + * + * The message bus is dependency-injected so the same manager can drive an + * in-memory test stub or a real bus implementation in production. + */ + +// ─── Phase types ───────────────────────────────────────────────────── + +export type StagedPhase = "plan" | "exec" | "verify"; + +export type StagedWorkflowTerminal = "complete" | "aborted"; + +// ─── Defaults ──────────────────────────────────────────────────────── + +const DEFAULT_PLAN_TIMEOUT_MS = 120_000; +const DEFAULT_EXEC_TIMEOUT_MS = 300_000; +const DEFAULT_VERIFY_TIMEOUT_MS = 120_000; +const DEFAULT_POLL_INTERVAL_MS = 50; + +/** + * Default completion regexes — ported from Agent-Forge. Each phase + * requires at least one match per agent to advance. + */ +export const DEFAULT_COMPLETION_PATTERNS: Record = { + plan: [ + /\bplan\b/i, + /\bstrateg/i, + /\bapproach\b/i, + /\bready\b/i, + /\bplan\s+shared\b/i, + ], + exec: [ + /\bdone\b/i, + /\bcomplete(?:d)?\b/i, + /\bfinished\b/i, + /\bimplemented\b/i, + /\bexec_done\b/i, + ], + verify: [ + /\bverify(?:_ok)?\b/i, + /\bverified\b/i, + /\bapproved\b/i, + /\breview\s+complete\b/i, + ], +}; + +// ─── Public config ─────────────────────────────────────────────────── + +export interface StagedWorkflowConfig { + /** Ordered phases to run. Defaults to the full PLAN → EXEC → VERIFY cycle. */ + phases?: StagedPhase[]; + /** Per-phase timeout. Missing entries fall back to per-phase defaults. */ + phaseTimeoutMs?: Partial>; + /** Per-phase completion patterns. Overrides defaults for that phase. */ + completionPatterns?: Partial>; + /** How often to poll the message bus while waiting for completion. */ + pollIntervalMs?: number; + /** Called whenever the phase changes. Purely observational. */ + onPhaseChange?: (phase: StagedPhase | StagedWorkflowTerminal) => void; +} + +// ─── Messages & bus ────────────────────────────────────────────────── + +/** Minimal message shape understood by StagedWorkflowManager. */ +export interface StagedMessage { + id: string; + from: string; + content: string; + phase?: StagedPhase; + createdAt: number; +} + +export interface MessageBusReadResult { + messages: StagedMessage[]; + cursor: number; +} + +/** Cursor-based message bus contract the manager depends on. */ +export interface StagedMessageBus { + getMessages(cursor: number): Promise; + post(msg: StagedMessage): Promise; +} + +/** Trivial in-memory bus for tests + default runs. */ +export class InMemoryMessageBus implements StagedMessageBus { + private readonly messages: StagedMessage[] = []; + + async getMessages(cursor: number): Promise { + const messages = this.messages.slice(cursor); + return { messages, cursor: this.messages.length }; + } + + async post(msg: StagedMessage): Promise { + this.messages.push(msg); + } + + /** Test-only snapshot. */ + all(): StagedMessage[] { + return [...this.messages]; + } +} + +// ─── Manager ───────────────────────────────────────────────────────── + +export interface StagedWorkflowManagerDeps { + messageBus: StagedMessageBus; + allAgents: string[]; + /** Override Date.now() for deterministic tests. */ + now?: () => number; + /** Override setTimeout-based sleep for deterministic tests. */ + sleep?: (ms: number) => Promise; +} + +export interface StagedWorkflowResult { + phase: StagedWorkflowTerminal; + transcript: StagedMessage[]; + durationMs: number; + phasesCompleted: StagedPhase[]; + phasesTimedOut: StagedPhase[]; +} + +export class StagedWorkflowManager { + private readonly phases: StagedPhase[]; + private readonly phaseTimeoutMs: Record; + private readonly completionPatterns: Record; + private readonly pollIntervalMs: number; + private readonly onPhaseChange?: ( + phase: StagedPhase | StagedWorkflowTerminal, + ) => void; + private readonly bus: StagedMessageBus; + private readonly allAgents: string[]; + private readonly now: () => number; + private readonly sleep: (ms: number) => Promise; + + private cursor = 0; + private transcript: StagedMessage[] = []; + private currentPhase: StagedPhase | StagedWorkflowTerminal | null = null; + + constructor(config: StagedWorkflowConfig, deps: StagedWorkflowManagerDeps) { + this.phases = + config.phases && config.phases.length > 0 + ? [...config.phases] + : (["plan", "exec", "verify"] as StagedPhase[]); + + const timeoutDefaults: Record = { + plan: DEFAULT_PLAN_TIMEOUT_MS, + exec: DEFAULT_EXEC_TIMEOUT_MS, + verify: DEFAULT_VERIFY_TIMEOUT_MS, + }; + this.phaseTimeoutMs = { + plan: config.phaseTimeoutMs?.plan ?? timeoutDefaults.plan, + exec: config.phaseTimeoutMs?.exec ?? timeoutDefaults.exec, + verify: config.phaseTimeoutMs?.verify ?? timeoutDefaults.verify, + }; + + this.completionPatterns = { + plan: + config.completionPatterns?.plan ?? DEFAULT_COMPLETION_PATTERNS.plan, + exec: + config.completionPatterns?.exec ?? DEFAULT_COMPLETION_PATTERNS.exec, + verify: + config.completionPatterns?.verify ?? DEFAULT_COMPLETION_PATTERNS.verify, + }; + + this.pollIntervalMs = config.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS; + this.onPhaseChange = config.onPhaseChange; + this.bus = deps.messageBus; + this.allAgents = [...deps.allAgents]; + this.now = deps.now ?? Date.now; + this.sleep = + deps.sleep ?? ((ms: number) => new Promise((r) => setTimeout(r, ms))); + } + + /** Current phase (or `null` before `run()` starts). */ + getCurrentPhase(): StagedPhase | StagedWorkflowTerminal | null { + return this.currentPhase; + } + + async run(): Promise { + const start = this.now(); + const phasesCompleted: StagedPhase[] = []; + const phasesTimedOut: StagedPhase[] = []; + + for (const phase of this.phases) { + this.setPhase(phase); + this.resetCursor(); + + const phaseStart = this.now(); + const timeoutMs = this.phaseTimeoutMs[phase]; + const patterns = this.completionPatterns[phase]; + + let advanced = false; + while (this.now() - phaseStart < timeoutMs) { + const read = await this.bus.getMessages(this.cursor); + if (read.messages.length > 0) { + this.transcript.push(...read.messages); + this.cursor = read.cursor; + } + if (this.allAgentsMatched(patterns)) { + advanced = true; + break; + } + await this.sleep(this.pollIntervalMs); + } + + if (advanced) { + phasesCompleted.push(phase); + } else { + phasesTimedOut.push(phase); + } + } + + this.setPhase("complete"); + return { + phase: "complete", + transcript: [...this.transcript], + durationMs: this.now() - start, + phasesCompleted, + phasesTimedOut, + }; + } + + /** Abort the workflow (future: signal-based cancellation). */ + abort(): void { + this.setPhase("aborted"); + } + + // ── Internal ───────────────────────────────────────────────────── + + private resetCursor(): void { + // Keep the transcript, but future completion checks only look at the + // messages accumulated *within* the current phase. Callers build their + // own match set against `this.transcript` filtered by phase. + this.cursor = 0; + this.transcript = []; + } + + private setPhase(next: StagedPhase | StagedWorkflowTerminal): void { + if (this.currentPhase === next) return; + this.currentPhase = next; + try { + this.onPhaseChange?.(next); + } catch { + // Observer errors must never derail the workflow. + } + } + + private allAgentsMatched(patterns: RegExp[]): boolean { + if (this.allAgents.length === 0) return false; + return this.allAgents.every((agent) => + this.transcript.some( + (msg) => + msg.from === agent && + patterns.some((re) => re.test(msg.content)), + ), + ); + } +} diff --git a/packages/core/src/orchestration/watchdog.ts b/packages/core/src/orchestration/watchdog.ts new file mode 100644 index 0000000..2e9cb6b --- /dev/null +++ b/packages/core/src/orchestration/watchdog.ts @@ -0,0 +1,222 @@ +// Ported from agent-forge/lib/agent-watchdog.ts — see docs/plans/ai-and-roundtable.md AD-7 +/** + * Stall detection for orchestrated agents. + * + * Protocol (per Agent-Forge): + * - 3 min of no progress → NUDGE (ask the agent to post concrete progress). + * - 5 min after nudge → STALL (emit a decision with kill / wait options). + * + * Unlike Agent-Forge's watchdog, this port is pure: it takes injected + * dependencies in the constructor and exposes a `tick()` method callers + * invoke on their own schedule. No timers, no I/O — friendly to tests. + */ + +// ─── Constants ─────────────────────────────────────────────────────── + +export const WATCHDOG_NUDGE_AFTER_MS = 180_000; +export const WATCHDOG_STALL_AFTER_MS = 300_000; +export const WATCHDOG_NUDGE_TEXT = + "Are you still working? Share concrete progress. If no active task remains, mark yourself done instead of repeating an idle status update."; + +export const WATCHDOG_STALL_OPTIONS = [ + "Kill agent", + "Wait 5 min", + "Wait 15 min", +] as const; + +// ─── Low-signal detection (ported verbatim) ────────────────────────── + +const LOW_SIGNAL_STATUS_INDICATORS = [ + /\bno active\b.*\b(?:implementation|coding|debugging|investigation|review|task|work)\b.*\b(?:in progress|underway)\b/iu, + /\b(?:conversation|thread)\s+remains\s+idle\b/iu, + /\b(?:awaiting|waiting for)\s+(?:a\s+)?(?:concrete|specific)\s+(?:workspace\s+)?task\b/iu, + /\buser-requested status relays?\b/iu, + /\bno active implementation\b/iu, + /\bno active coding\b/iu, +]; + +const LOW_SIGNAL_SHORT_STATUS_PATTERNS = [ + /^still working[.!]?$/iu, + /^working on it[.!]?$/iu, + /^investigating[.!]?$/iu, + /^looking into it[.!]?$/iu, + /^status update[.:!]?$/iu, + /^progress update[.:!]?$/iu, +]; + +function normalize(content: string): string { + return content.replace(/\s+/g, " ").trim().toLowerCase(); +} + +/** + * Heuristic — returns true if the given message is a "I'm idle, nothing to + * report" reply. The watchdog treats such replies as non-progress so it keeps + * nudging/stalling instead of resetting the timer. + */ +export function isLowSignalWatchdogReply(content: string): boolean { + const normalized = normalize(content); + if (!normalized) return true; + if (LOW_SIGNAL_SHORT_STATUS_PATTERNS.some((p) => p.test(normalized))) { + return true; + } + const indicatorCount = LOW_SIGNAL_STATUS_INDICATORS.filter((p) => + p.test(normalized), + ).length; + if (indicatorCount >= 2) return true; + return ( + /^(?:current|latest|thread)\s+(?:status|progress)/iu.test(normalized) && + indicatorCount >= 1 + ); +} + +// ─── Event + dep types ─────────────────────────────────────────────── + +export type WatchdogEventType = "nudge" | "stall" | "decision"; + +export interface WatchdogEvent { + type: WatchdogEventType; + agentId: string; + timestamp: number; + text?: string; + options?: readonly string[]; +} + +export interface AgentWatchdogDeps { + /** True iff the agent is currently considered active (not stalled/exited). */ + isAgentActive: (agentId: string) => boolean; + /** Epoch ms of the agent's last substantive message. */ + getLastMessageAt: (agentId: string) => number; + /** Deliver a decision prompt to the operator (kill / wait). */ + postDecision: (agentId: string, options: readonly string[]) => void; + /** Deliver a nudge prompt to the agent. */ + postNudge?: (agentId: string, text: string) => void; + /** Observer for all lifecycle events. */ + onEvent?: (evt: WatchdogEvent) => void; + /** Override for tests. */ + now?: () => number; + /** Override default thresholds. */ + nudgeAfterMs?: number; + stallAfterMs?: number; +} + +interface AgentTrackedState { + nudgedAt?: number; + stalledAt?: number; +} + +// ─── Watchdog ──────────────────────────────────────────────────────── + +export class AgentWatchdog { + private readonly tracked = new Map(); + private readonly now: () => number; + private readonly nudgeAfterMs: number; + private readonly stallAfterMs: number; + + constructor(private readonly deps: AgentWatchdogDeps) { + this.now = deps.now ?? Date.now; + this.nudgeAfterMs = deps.nudgeAfterMs ?? WATCHDOG_NUDGE_AFTER_MS; + this.stallAfterMs = deps.stallAfterMs ?? WATCHDOG_STALL_AFTER_MS; + } + + /** Start tracking an agent. Idempotent. */ + track(agentId: string): void { + if (!this.tracked.has(agentId)) { + this.tracked.set(agentId, {}); + } + } + + /** Stop tracking an agent entirely. */ + forget(agentId: string): void { + this.tracked.delete(agentId); + } + + /** Clear nudge/stall markers — e.g., on substantive progress. */ + reset(agentId: string): void { + if (this.tracked.has(agentId)) { + this.tracked.set(agentId, {}); + } + } + + /** + * Run one evaluation pass across all tracked agents. Emits nudges and + * stalls via the injected callbacks. + */ + tick(): WatchdogEvent[] { + const emitted: WatchdogEvent[] = []; + const now = this.now(); + + for (const [agentId, state] of this.tracked.entries()) { + if (!this.deps.isAgentActive(agentId)) continue; + + const lastAt = this.deps.getLastMessageAt(agentId); + if (!Number.isFinite(lastAt)) continue; + + const idleMs = now - lastAt; + + // Nudge at threshold (once). + if (!state.nudgedAt && idleMs >= this.nudgeAfterMs) { + state.nudgedAt = now; + try { + this.deps.postNudge?.(agentId, WATCHDOG_NUDGE_TEXT); + } catch { + // Nudge delivery failure must not halt the watchdog. + } + const evt: WatchdogEvent = { + type: "nudge", + agentId, + timestamp: now, + text: WATCHDOG_NUDGE_TEXT, + }; + this.safeEmit(evt); + emitted.push(evt); + continue; + } + + // Stall + decision, after nudge, once the stall threshold passes. + if (state.nudgedAt && !state.stalledAt) { + const sinceNudge = now - state.nudgedAt; + if (sinceNudge >= this.stallAfterMs) { + state.stalledAt = now; + const stallEvt: WatchdogEvent = { + type: "stall", + agentId, + timestamp: now, + }; + const decisionEvt: WatchdogEvent = { + type: "decision", + agentId, + timestamp: now, + options: WATCHDOG_STALL_OPTIONS, + }; + try { + this.deps.postDecision(agentId, WATCHDOG_STALL_OPTIONS); + } catch { + // Decision delivery failure must not halt the watchdog. + } + this.safeEmit(stallEvt); + this.safeEmit(decisionEvt); + emitted.push(stallEvt, decisionEvt); + } + } + } + + return emitted; + } + + /** Snapshot of internal state — mostly for tests. */ + snapshot(): Record { + const out: Record = {}; + for (const [k, v] of this.tracked.entries()) { + out[k] = { ...v }; + } + return out; + } + + private safeEmit(evt: WatchdogEvent): void { + try { + this.deps.onEvent?.(evt); + } catch { + // Observer errors must not halt the watchdog. + } + } +} diff --git a/tests/integration/chat-cli.test.ts b/tests/integration/chat-cli.test.ts new file mode 100644 index 0000000..f2b0b38 --- /dev/null +++ b/tests/integration/chat-cli.test.ts @@ -0,0 +1,176 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +// --------------------------------------------------------------------------- +// Mock `@axiom-labs/arc-core` — we override `getAgentClientForProfile` to +// return a stub client that yields a scripted AgentChunk stream. Everything +// else (ChatSession, ToolRegistry, runAgent, buildSystemPrompt, config +// helpers) comes from the real core module. +// +// writeLogEvent is stubbed so store ops don't race on an empty logs dir. +// --------------------------------------------------------------------------- + +vi.mock("@axiom-labs/arc-core", async (importOriginal) => { + const actual = (await importOriginal()) as Record; + return { + ...actual, + writeLogEvent: vi.fn(), + getAgentClientForProfile: vi.fn(), + }; +}); + +import { + getAgentClientForProfile, + loadSession, + listSessions, + type AgentChunk, + type AgentClient, +} from "@axiom-labs/arc-core"; +import type { ArcConfig } from "../../packages/core/src/types.js"; +import { handleChat } from "../../packages/cli/src/commands/chat.js"; + +// --------------------------------------------------------------------------- +// Per-test filesystem isolation +// --------------------------------------------------------------------------- + +let arcDir: string; +let prevArcDir: string | undefined; +const profileName = "test-profile"; + +function writeConfig(cfg: ArcConfig): void { + fs.mkdirSync(arcDir, { recursive: true }); + fs.writeFileSync(path.join(arcDir, "config.json"), JSON.stringify(cfg, null, 2), "utf-8"); +} + +function scriptedClient(chunks: AgentChunk[]): AgentClient { + const sendFn = vi.fn(async function* (_prompt: string) { + for (const c of chunks) yield c; + }); + return { + send: sendFn as unknown as AgentClient["send"], + shutdown: vi.fn(async () => {}), + }; +} + +beforeEach(() => { + arcDir = fs.mkdtempSync(path.join(os.tmpdir(), "arc-chat-cli-test-")); + prevArcDir = process.env["ARC_DIR"]; + process.env["ARC_DIR"] = arcDir; + fs.mkdirSync(path.join(arcDir, "profiles", profileName), { recursive: true }); + fs.mkdirSync(path.join(arcDir, "logs"), { recursive: true }); + writeConfig({ + version: 1, + activeProfile: profileName, + profiles: { + [profileName]: { + authType: "oauth", + tool: "claude", + configDir: path.join(arcDir, "profiles", profileName), + createdAt: "2026-01-01T00:00:00Z", + }, + }, + }); + vi.mocked(getAgentClientForProfile).mockReset(); +}); + +afterEach(() => { + if (prevArcDir === undefined) delete process.env["ARC_DIR"]; + else process.env["ARC_DIR"] = prevArcDir; + try { + fs.rmSync(arcDir, { recursive: true, force: true }); + } catch { + /* ignore */ + } +}); + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("arc chat --once (one-shot)", () => { + it("streams text and terminates on done, persisting the session", async () => { + const client = scriptedClient([ + { type: "text", content: "Hello" }, + { type: "text", content: " world." }, + { type: "done", reason: "end_turn" }, + ]); + vi.mocked(getAgentClientForProfile).mockReturnValue(client); + + // Capture stdout. + const writes: string[] = []; + const origWrite = process.stdout.write.bind(process.stdout); + process.stdout.write = ((chunk: string | Uint8Array) => { + writes.push(typeof chunk === "string" ? chunk : chunk.toString()); + return true; + }) as typeof process.stdout.write; + + try { + await handleChat({ once: "hi", mode: "supervised" }); + } finally { + process.stdout.write = origWrite; + } + + const out = writes.join(""); + expect(out).toContain("Hello"); + expect(out).toContain("world."); + + const sessions = listSessions(profileName); + expect(sessions.length).toBe(1); + + const session = loadSession(profileName, sessions[0].id); + // system seed + user + assistant + expect(session.messages.length).toBe(3); + const roles = session.messages.map((m) => m.role); + expect(roles).toEqual(["system", "user", "assistant"]); + expect(session.messages[1].content).toBe("hi"); + expect(session.messages[2].content).toContain("Hello world."); + }); + + it("dispatches a list_profiles tool_call, executes it, streams the result, and records it on the assistant message", async () => { + const client = scriptedClient([ + { type: "text", content: "Looking up profiles..." }, + { + type: "tool_call", + id: "tc-1", + tool: "list_profiles", + input: {}, + }, + { type: "done", reason: "end_turn" }, + ]); + vi.mocked(getAgentClientForProfile).mockReturnValue(client); + + const writes: string[] = []; + const origWrite = process.stdout.write.bind(process.stdout); + process.stdout.write = ((chunk: string | Uint8Array) => { + writes.push(typeof chunk === "string" ? chunk : chunk.toString()); + return true; + }) as typeof process.stdout.write; + + try { + // autonomous so the read-only `list_profiles` tool doesn't need confirmation + // (it's a read tool so it wouldn't anyway, but this makes intent explicit) + await handleChat({ once: "list my profiles", mode: "autonomous" }); + } finally { + process.stdout.write = origWrite; + } + + const out = writes.join(""); + expect(out).toContain("tool:list_profiles"); + expect(out).toContain("result"); + // list_profiles serialises the profile into its output; our profile name should appear + expect(out).toContain(profileName); + + const sessions = listSessions(profileName); + expect(sessions.length).toBe(1); + + const session = loadSession(profileName, sessions[0].id); + const assistant = session.messages.find((m) => m.role === "assistant"); + expect(assistant).toBeDefined(); + expect(assistant?.toolCalls).toBeDefined(); + expect(assistant?.toolCalls?.[0]?.name).toBe("list_profiles"); + expect(assistant?.toolCalls?.[0]?.id).toBe("tc-1"); + expect(assistant?.toolCalls?.[0]?.result).toBeDefined(); + }); +}); diff --git a/tests/unit/chat/session.test.ts b/tests/unit/chat/session.test.ts new file mode 100644 index 0000000..46d47fb --- /dev/null +++ b/tests/unit/chat/session.test.ts @@ -0,0 +1,127 @@ +import { describe, it, expect } from "vitest"; +import { ChatSession } from "../../../packages/core/src/chat/session.js"; + +describe("ChatSession", () => { + it("constructs with defaults and generates an id", () => { + const s = new ChatSession({ profileName: "p", permissionMode: "supervised" }); + expect(s.id).toMatch(/^[0-9a-f]{8}-[0-9a-f]{4}/); + expect(s.profileName).toBe("p"); + expect(s.messages).toEqual([]); + expect(s.permissionMode).toBe("supervised"); + expect(typeof s.createdAt).toBe("string"); + }); + + it("appends messages and updates updatedAt", async () => { + const s = new ChatSession({ profileName: "p", permissionMode: "read-only" }); + const firstUpdated = s.updatedAt; + // ensure clock advances + await new Promise((r) => setTimeout(r, 5)); + s.append({ role: "user", content: "hi" }); + expect(s.messages.length).toBe(1); + expect(s.messages[0].role).toBe("user"); + expect(s.messages[0].content).toBe("hi"); + expect(typeof s.messages[0].timestamp).toBe("string"); + expect(s.updatedAt).not.toBe(firstUpdated); + }); + + it("summary() returns (empty session) when no user message", () => { + const s = new ChatSession({ profileName: "p", permissionMode: "autonomous" }); + expect(s.summary()).toBe("(empty session)"); + s.append({ role: "assistant", content: "hi" }); + expect(s.summary()).toBe("(empty session)"); + }); + + it("summary() truncates first user message to 60 chars", () => { + const s = new ChatSession({ profileName: "p", permissionMode: "supervised" }); + s.append({ role: "user", content: "a".repeat(120) }); + const sum = s.summary(); + expect(sum.length).toBe(60); + expect(sum.endsWith("...")).toBe(true); + }); + + it("summary() collapses whitespace", () => { + const s = new ChatSession({ profileName: "p", permissionMode: "supervised" }); + s.append({ role: "user", content: "hello world\nline2" }); + expect(s.summary()).toBe("hello world line2"); + }); + + it("serialize/load roundtrips losslessly", () => { + const s = new ChatSession({ profileName: "p", permissionMode: "supervised" }); + s.append({ role: "user", content: "hello" }); + s.append({ + role: "assistant", + content: "hi back", + toolCalls: [ + { + id: "t1", + name: "list_profiles", + input: {}, + result: { profiles: [] }, + }, + ], + }); + s.append({ role: "tool", content: "done", toolCallId: "t1" }); + + const json = s.serialize(); + const revived = ChatSession.load(JSON.parse(JSON.stringify(json))); + expect(revived.id).toBe(s.id); + expect(revived.profileName).toBe(s.profileName); + expect(revived.permissionMode).toBe(s.permissionMode); + expect(revived.messages.length).toBe(3); + expect(revived.messages[1].toolCalls?.[0]?.name).toBe("list_profiles"); + expect(revived.messages[2].toolCallId).toBe("t1"); + }); + + it("load() rejects non-object JSON", () => { + expect(() => ChatSession.load(null)).toThrow(/not an object/); + expect(() => ChatSession.load(42)).toThrow(/not an object/); + }); + + it("load() rejects missing id / profileName", () => { + expect(() => ChatSession.load({ profileName: "p", permissionMode: "supervised", messages: [] })).toThrow(/missing id/); + expect(() => + ChatSession.load({ id: "x", permissionMode: "supervised", messages: [] }), + ).toThrow(/missing profileName/); + }); + + it("load() rejects unknown permissionMode", () => { + expect(() => + ChatSession.load({ id: "x", profileName: "p", permissionMode: "loose", messages: [] }), + ).toThrow(/bad permissionMode/); + }); + + it("load() rejects non-array messages", () => { + expect(() => + ChatSession.load({ id: "x", profileName: "p", permissionMode: "supervised", messages: "nope" }), + ).toThrow(/must be an array/); + }); + + it("load() rejects malformed message entries", () => { + expect(() => + ChatSession.load({ + id: "x", + profileName: "p", + permissionMode: "supervised", + messages: [{ role: "x", content: "hi", timestamp: "2026-01-01" }], + }), + ).toThrow(/role is invalid/); + + expect(() => + ChatSession.load({ + id: "x", + profileName: "p", + permissionMode: "supervised", + messages: [{ role: "user", content: 42, timestamp: "2026-01-01" }], + }), + ).toThrow(/content must be a string/); + + expect(() => + ChatSession.load({ + id: "x", + profileName: "p", + permissionMode: "supervised", + messages: [{ role: "user", content: "hi", timestamp: 123 }], + }), + ).toThrow(/timestamp must be a string/); + }); +}); diff --git a/tests/unit/chat/store.test.ts b/tests/unit/chat/store.test.ts new file mode 100644 index 0000000..8c14e56 --- /dev/null +++ b/tests/unit/chat/store.test.ts @@ -0,0 +1,122 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { ChatSession } from "../../../packages/core/src/chat/session.js"; +import { + saveSession, + loadSession, + listSessions, + deleteSession, + getChatSessionsDir, + getChatSessionPath, +} from "../../../packages/core/src/chat/store.js"; + +let tmpDir: string; +let prevEnv: string | undefined; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "arc-chat-store-")); + prevEnv = process.env["ARC_DIR"]; + process.env["ARC_DIR"] = tmpDir; +}); + +afterEach(() => { + if (prevEnv === undefined) delete process.env["ARC_DIR"]; + else process.env["ARC_DIR"] = prevEnv; + try { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } catch { + /* ignore */ + } +}); + +describe("chat store", () => { + it("getChatSessionsDir is under ARC_DIR/profiles/", () => { + const dir = getChatSessionsDir("work"); + expect(dir).toBe(path.join(tmpDir, "profiles", "work", "chat-sessions")); + }); + + it("rejects invalid session ids in path helper", () => { + expect(() => getChatSessionPath("work", "../escape")).toThrow(/Invalid chat session id/); + expect(() => getChatSessionPath("work", "x/y")).toThrow(/Invalid chat session id/); + }); + + it("saves and loads a session round-trip", () => { + const s = new ChatSession({ profileName: "work", permissionMode: "supervised" }); + s.append({ role: "user", content: "hello" }); + s.append({ role: "assistant", content: "hi" }); + + saveSession(s); + + const filePath = getChatSessionPath("work", s.id); + expect(fs.existsSync(filePath)).toBe(true); + + const loaded = loadSession("work", s.id); + expect(loaded.id).toBe(s.id); + expect(loaded.profileName).toBe("work"); + expect(loaded.messages).toHaveLength(2); + expect(loaded.messages[0].role).toBe("user"); + }); + + it("listSessions returns empty when dir doesn't exist", () => { + expect(listSessions("nope")).toEqual([]); + }); + + it("listSessions returns summaries sorted by updatedAt desc", async () => { + const s1 = new ChatSession({ profileName: "work", permissionMode: "supervised" }); + s1.append({ role: "user", content: "first" }); + saveSession(s1); + + await new Promise((r) => setTimeout(r, 10)); + + const s2 = new ChatSession({ profileName: "work", permissionMode: "supervised" }); + s2.append({ role: "user", content: "second" }); + saveSession(s2); + + const list = listSessions("work"); + expect(list.length).toBe(2); + expect(list[0].id).toBe(s2.id); + expect(list[1].id).toBe(s1.id); + expect(list[0].summary).toBe("second"); + expect(list[0].messageCount).toBe(1); + }); + + it("listSessions skips malformed files", () => { + const dir = getChatSessionsDir("work"); + fs.mkdirSync(dir, { recursive: true }); + fs.writeFileSync(path.join(dir, "broken.json"), "not valid json", "utf-8"); + const s = new ChatSession({ profileName: "work", permissionMode: "supervised" }); + s.append({ role: "user", content: "ok" }); + saveSession(s); + const list = listSessions("work"); + expect(list).toHaveLength(1); + expect(list[0].id).toBe(s.id); + }); + + it("deleteSession removes file; no-op if missing", () => { + const s = new ChatSession({ profileName: "work", permissionMode: "supervised" }); + s.append({ role: "user", content: "hi" }); + saveSession(s); + const p = getChatSessionPath("work", s.id); + expect(fs.existsSync(p)).toBe(true); + deleteSession("work", s.id); + expect(fs.existsSync(p)).toBe(false); + // no-op second time + expect(() => deleteSession("work", s.id)).not.toThrow(); + }); + + it("loadSession throws for missing files", () => { + expect(() => loadSession("work", "00000000-0000-0000-0000-000000000000")).toThrow( + /not found/, + ); + }); + + it("loadSession throws for malformed JSON", () => { + const dir = getChatSessionsDir("work"); + fs.mkdirSync(dir, { recursive: true }); + const id = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"; + fs.writeFileSync(path.join(dir, `${id}.json`), "{not json", "utf-8"); + expect(() => loadSession("work", id)).toThrow(/malformed/); + }); +}); diff --git a/tests/unit/hooks/roundtable-hook.test.ts b/tests/unit/hooks/roundtable-hook.test.ts new file mode 100644 index 0000000..603c4bd --- /dev/null +++ b/tests/unit/hooks/roundtable-hook.test.ts @@ -0,0 +1,247 @@ +import { describe, it, expect } from "vitest"; +import { + createRoundtableHook, + type RoundtableState, +} from "../../../packages/core/src/hooks/roundtable.js"; +import { HookStateStore } from "../../../packages/core/src/hooks/hook-state.js"; +import type { + HookContext, + AgentResponse, +} from "../../../packages/core/src/hooks/types.js"; +import type { Profile } from "../../../packages/core/src/types.js"; + +function makeProfile(overrides: Partial = {}): Profile { + return { + authType: "api-key", + tool: "claude", + configDir: "/tmp/arc-test", + createdAt: new Date().toISOString(), + enforcement: "log", + ...overrides, + }; +} + +function ctx(overrides: Partial = {}): HookContext { + return { + message: "", + sessionId: "s1", + profile: makeProfile(), + adapter: "claude", + hookMetadata: {}, + ...overrides, + }; +} + +function getState(store: HookStateStore, sessionId: string) { + return store.get(sessionId, "roundtable", "roundtableState"); +} + +describe("roundtable hook — state machine", () => { + it("creates initial state on @roundtable trigger with parsed agents and rounds", async () => { + const store = new HookStateStore(); + const hook = createRoundtableHook(store); + + const result = await hook.check( + ctx({ + adapter: "claude", + message: "@roundtable should we deploy?\nrounds: 3\nagents: claude, codex, gemini", + }), + ); + + expect(result.pass).toBe(true); + expect(result.metadata).toMatchObject({ + roundtable: true, + topic: "should we deploy?", + currentRound: 1, + totalRounds: 3, + currentAgent: "claude", + }); + + const state = getState(store, "s1"); + expect(state).toBeDefined(); + expect(state!.agents).toEqual(["claude", "codex", "gemini"]); + expect(state!.rounds).toBe(3); + expect(state!.status).toBe("active"); + expect(state!.modes).toEqual({ + claude: "advocate", + codex: "critic", + gemini: "neutral", + }); + }); + + it("uses defaultRounds when not specified", async () => { + const store = new HookStateStore(); + const hook = createRoundtableHook(store, { defaultRounds: 5 }); + await hook.check( + ctx({ message: "@roundtable topic here\nagents: a, b" }), + ); + expect(getState(store, "s1")!.rounds).toBe(5); + }); + + it("ignores a trigger when a roundtable is already active", async () => { + const store = new HookStateStore(); + const hook = createRoundtableHook(store); + + await hook.check( + ctx({ message: "@roundtable first topic\nagents: claude, codex" }), + ); + const result = await hook.check( + ctx({ message: "@roundtable new topic\nagents: x, y" }), + ); + expect(result.flag).toMatch(/already active/); + expect(result.metadata?.triggerIgnored).toBe(true); + + const state = getState(store, "s1"); + expect(state!.agents).toEqual(["claude", "codex"]); // unchanged + }); + + it("enforce mode blocks out-of-turn messages", async () => { + const store = new HookStateStore(); + const hook = createRoundtableHook(store); + await hook.check( + ctx({ + message: "@roundtable topic\nagents: claude, codex", + profile: makeProfile({ enforcement: "enforce" }), + }), + ); + + // Codex speaks out of turn — claude is expected first. + const result = await hook.check( + ctx({ + adapter: "codex", + message: "hello", + profile: makeProfile({ enforcement: "enforce" }), + }), + ); + expect(result.pass).toBe(false); + expect(result.block).toBe(true); + expect(result.reason).toMatch(/claude/); + }); + + it("log mode records out-of-turn messages but does not block", async () => { + const store = new HookStateStore(); + const hook = createRoundtableHook(store); + await hook.check( + ctx({ message: "@roundtable topic\nagents: claude, codex" }), + ); + + const result = await hook.check(ctx({ adapter: "codex", message: "hi" })); + expect(result.pass).toBe(true); + expect(result.block).toBeFalsy(); + expect(result.metadata?.outOfTurn).toBe(true); + }); + + it("advise mode flags out-of-turn but does not block", async () => { + const store = new HookStateStore(); + const hook = createRoundtableHook(store); + await hook.check( + ctx({ + message: "@roundtable topic\nagents: claude, codex", + profile: makeProfile({ enforcement: "advise" }), + }), + ); + const result = await hook.check( + ctx({ + adapter: "codex", + message: "hi", + profile: makeProfile({ enforcement: "advise" }), + }), + ); + expect(result.pass).toBe(true); + expect(result.flag).toMatch(/claude/); + }); + + it("postProcess advances turns and rounds until synthesizing", async () => { + const store = new HookStateStore(); + const hook = createRoundtableHook(store, { defaultRounds: 2 }); + await hook.check( + ctx({ message: "@roundtable topic\nagents: claude, codex" }), + ); + + const respond = async (agent: string, content: string) => { + await hook.postProcess!( + ctx({ adapter: agent }), + { content } as AgentResponse, + ); + }; + + // Round 1 + await respond("claude", "r1-c"); + expect(getState(store, "s1")!.currentTurnIndex).toBe(1); + expect(getState(store, "s1")!.currentRound).toBe(1); + + await respond("codex", "r1-x"); + expect(getState(store, "s1")!.currentTurnIndex).toBe(0); + expect(getState(store, "s1")!.currentRound).toBe(2); + + // Round 2 + await respond("claude", "r2-c"); + await respond("codex", "r2-x"); + + const final = getState(store, "s1")!; + expect(final.status).toBe("synthesizing"); + expect(final.responses).toHaveLength(4); + }); + + it("out-of-turn response in log mode is recorded but does not advance turn", async () => { + const store = new HookStateStore(); + const hook = createRoundtableHook(store); + await hook.check( + ctx({ message: "@roundtable topic\nagents: claude, codex" }), + ); + + await hook.postProcess!( + ctx({ adapter: "codex" }), // out of turn + { content: "jumped in" } as AgentResponse, + ); + + const state = getState(store, "s1")!; + expect(state.currentTurnIndex).toBe(0); // still claude's turn + expect(state.responses).toHaveLength(1); // but recorded + }); + + it("synthesizing → complete on next message through check()", async () => { + const store = new HookStateStore(); + const hook = createRoundtableHook(store, { defaultRounds: 1 }); + await hook.check( + ctx({ message: "@roundtable topic\nagents: claude, codex" }), + ); + await hook.postProcess!( + ctx({ adapter: "claude" }), + { content: "a" } as AgentResponse, + ); + await hook.postProcess!( + ctx({ adapter: "codex" }), + { content: "b" } as AgentResponse, + ); + expect(getState(store, "s1")!.status).toBe("synthesizing"); + + const result = await hook.check( + ctx({ adapter: "claude", message: "synthesis message" }), + ); + expect(result.metadata?.status).toBe("complete"); + expect(getState(store, "s1")!.status).toBe("complete"); + }); + + it("inject() returns live metadata while active, undefined when complete", async () => { + const store = new HookStateStore(); + const hook = createRoundtableHook(store); + await hook.check( + ctx({ message: "@roundtable topic\nagents: claude, codex" }), + ); + + const meta = hook.inject!(ctx()); + expect(meta).toMatchObject({ + roundtable: true, + currentAgent: "claude", + currentRound: 1, + totalRounds: 2, + }); + + // Force complete + const state = getState(store, "s1")!; + state.status = "complete"; + store.set("s1", "roundtable", "roundtableState", state); + expect(hook.inject!(ctx())).toBeUndefined(); + }); +}); diff --git a/tests/unit/orchestration/delivery-policy.test.ts b/tests/unit/orchestration/delivery-policy.test.ts new file mode 100644 index 0000000..d0b8600 --- /dev/null +++ b/tests/unit/orchestration/delivery-policy.test.ts @@ -0,0 +1,143 @@ +import { describe, it, expect } from "vitest"; +import { + computeAdaptiveGraceMs, + createDeliveryState, + MessagePriorityQueue, + resolveDeliveryPolicyForTool, + updateReplyLatencyAverage, +} from "../../../packages/core/src/orchestration/delivery-policy.js"; + +describe("orchestration/delivery-policy", () => { + describe("createDeliveryState", () => { + it("returns an empty EMA state", () => { + const s = createDeliveryState(); + expect(s.observedLatencyMs).toBe(0); + expect(s.sampleCount).toBe(0); + }); + }); + + describe("resolveDeliveryPolicyForTool", () => { + it("dispatches to the gemini profile", () => { + const p = resolveDeliveryPolicyForTool("gemini"); + expect(p.minReplyGraceMs).toBe(18_000); + expect(p.adaptiveReplyMultiplier).toBeCloseTo(1.6); + }); + + it("dispatches to the claude profile (case-insensitive)", () => { + const p = resolveDeliveryPolicyForTool("CLAUDE"); + expect(p.minReplyGraceMs).toBe(12_000); + expect(p.adaptiveReplyMultiplier).toBeCloseTo(1.45); + }); + + it("dispatches to the codex profile", () => { + const p = resolveDeliveryPolicyForTool("codex"); + expect(p.minReplyGraceMs).toBe(8_000); + }); + + it("falls back to the default profile for unknown tool", () => { + const p = resolveDeliveryPolicyForTool("mystery-tool"); + expect(p.minReplyGraceMs).toBe(10_000); + }); + + it("falls back to the default profile when tool is undefined", () => { + const p = resolveDeliveryPolicyForTool(undefined); + expect(p.minReplyGraceMs).toBe(10_000); + }); + }); + + describe("updateReplyLatencyAverage", () => { + it("seeds the EMA on the first sample", () => { + const updated = updateReplyLatencyAverage(createDeliveryState(), 2_000); + expect(updated.observedLatencyMs).toBe(2_000); + expect(updated.sampleCount).toBe(1); + }); + + it("decays prior state 70% and folds new sample at 30%", () => { + // observed=1000, new=3000 → 1000*0.7 + 3000*0.3 = 1600 + const next = updateReplyLatencyAverage( + { observedLatencyMs: 1_000, sampleCount: 1 }, + 3_000, + ); + expect(next.observedLatencyMs).toBe(1_600); + expect(next.sampleCount).toBe(2); + }); + + it("ignores invalid samples", () => { + const state = { observedLatencyMs: 1_000, sampleCount: 1 }; + expect(updateReplyLatencyAverage(state, 0)).toEqual(state); + expect(updateReplyLatencyAverage(state, -5)).toEqual(state); + expect(updateReplyLatencyAverage(state, Number.NaN)).toEqual(state); + }); + }); + + describe("computeAdaptiveGraceMs", () => { + const policy = resolveDeliveryPolicyForTool("claude"); + + it("returns 0 when pacing is disabled", () => { + const p = { ...policy, respectAgentPace: false }; + expect(computeAdaptiveGraceMs(p, { observedLatencyMs: 5_000, sampleCount: 1 })).toBe(0); + }); + + it("returns minReplyGrace when no samples exist", () => { + expect(computeAdaptiveGraceMs(policy, createDeliveryState())).toBe( + policy.minReplyGraceMs, + ); + }); + + it("clamps to min when observed*multiplier is small", () => { + const grace = computeAdaptiveGraceMs(policy, { + observedLatencyMs: 100, + sampleCount: 1, + }); + expect(grace).toBe(policy.minReplyGraceMs); + }); + + it("scales linearly in the middle of the range", () => { + const grace = computeAdaptiveGraceMs(policy, { + observedLatencyMs: 20_000, + sampleCount: 3, + }); + // 20_000 * 1.45 = 29_000, within [12_000, 90_000] + expect(grace).toBe(29_000); + }); + + it("clamps to max when observed latency is huge", () => { + const grace = computeAdaptiveGraceMs(policy, { + observedLatencyMs: 10_000_000, + sampleCount: 10, + }); + expect(grace).toBe(policy.maxReplyGraceMs); + }); + }); + + describe("MessagePriorityQueue", () => { + it("drains urgent items before normal items", () => { + const q = new MessagePriorityQueue(); + q.enqueue({ text: "n1", priority: "normal", createdAt: 1 }); + q.enqueue({ text: "u1", priority: "urgent", createdAt: 2 }); + q.enqueue({ text: "l1", priority: "low", createdAt: 3 }); + expect(q.dequeue()?.text).toBe("u1"); + expect(q.dequeue()?.text).toBe("n1"); + expect(q.dequeue()?.text).toBe("l1"); + expect(q.dequeue()).toBeUndefined(); + }); + + it("preserves FIFO within a priority level", () => { + const q = new MessagePriorityQueue(); + q.enqueue({ text: "a", priority: "normal", createdAt: 1 }); + q.enqueue({ text: "b", priority: "normal", createdAt: 2 }); + expect(q.dequeue()?.text).toBe("a"); + expect(q.dequeue()?.text).toBe("b"); + }); + + it("exposes size, peek, snapshot, and clear", () => { + const q = new MessagePriorityQueue(); + q.enqueue({ text: "x", priority: "normal", createdAt: 1 }); + expect(q.size).toBe(1); + expect(q.peek()?.text).toBe("x"); + expect(q.snapshot().length).toBe(1); + q.clear(); + expect(q.size).toBe(0); + }); + }); +}); diff --git a/tests/unit/orchestration/roundtable.test.ts b/tests/unit/orchestration/roundtable.test.ts new file mode 100644 index 0000000..11a9c60 --- /dev/null +++ b/tests/unit/orchestration/roundtable.test.ts @@ -0,0 +1,311 @@ +import { describe, it, expect } from "vitest"; +import type { + AgentChunk, + AgentClient, + AgentSendOptions, +} from "../../../packages/core/src/agent-client/types.js"; +import type { Profile } from "../../../packages/core/src/types.js"; +import { + RoundtableOrchestrator, + type RoundtableAgent, + type RoundtableEvent, +} from "../../../packages/core/src/orchestration/roundtable.js"; + +function profile(tool: string, name: string): Profile { + return { + authType: "api-key", + tool, + configDir: `/tmp/arc-test-${name}`, + createdAt: new Date().toISOString(), + }; +} + +/** + * Scripted mock AgentClient. `scripts` maps call index → text to emit. + * Each `send()` yields one text chunk and a `done`. + */ +function makeMockClient(scripts: string[]): AgentClient & { calls: number } { + let calls = 0; + const client = { + async *send( + _prompt: string, + _opts?: AgentSendOptions, + ): AsyncIterable { + const text = scripts[calls] ?? ""; + calls++; + yield { type: "text", content: text }; + yield { type: "done", reason: "end_turn" }; + }, + async shutdown(): Promise {}, + get calls() { + return calls; + }, + } as AgentClient & { calls: number }; + return client; +} + +describe("RoundtableOrchestrator", () => { + it("runs 3 agents × 2 rounds = 6 turns, then synthesis", async () => { + const clientA = makeMockClient([ + "Alice round 1", + "Alice round 2", + ]); + const clientB = makeMockClient([ + "Bob round 1", + "Bob round 2", + ]); + const clientC = makeMockClient([ + "Carol round 1", + "Carol round 2", + JSON.stringify({ + consensus: 0.8, + summary: "We agreed on A.", + keyPoints: ["point1", "point2"], + }), + ]); + + const agents: RoundtableAgent[] = [ + { role: "advocate", profile: profile("claude", "a"), displayName: "alice" }, + { role: "critic", profile: profile("codex", "b"), displayName: "bob" }, + { role: "neutral", profile: profile("claude", "c"), displayName: "carol" }, + ]; + + const clients: Record = { + alice: clientA, + bob: clientB, + carol: clientC, + }; + + const orchestrator = new RoundtableOrchestrator({ + clientFactory: (p) => { + // Pick the matching client by configDir suffix. + if (p.configDir.endsWith("-a")) return clients.alice; + if (p.configDir.endsWith("-b")) return clients.bob; + return clients.carol; + }, + sleep: async () => {}, // skip pacing in tests + }); + + const events: RoundtableEvent[] = []; + const result = await orchestrator.run({ + topic: "Should we refactor?", + agents, + rounds: 2, + synthesizer: agents[2], // carol + onEvent: (evt) => events.push(evt), + }); + + // 6 turn-complete events + const turnCompletes = events.filter((e) => e.type === "turn-complete"); + expect(turnCompletes).toHaveLength(6); + + // Turn order per round + const turnStarts = events.filter((e) => e.type === "turn-start") as Array< + Extract + >; + expect(turnStarts.map((t) => t.agent)).toEqual([ + "alice", + "bob", + "carol", + "alice", + "bob", + "carol", + ]); + + // Round numbers + expect(turnStarts.slice(0, 3).every((t) => t.round === 1)).toBe(true); + expect(turnStarts.slice(3, 6).every((t) => t.round === 2)).toBe(true); + + // Transcript matches + expect(result.transcript).toHaveLength(6); + expect(result.transcript[0].content).toBe("Alice round 1"); + expect(result.transcript[5].content).toBe("Carol round 2"); + + // Synthesis called on carol (her client saw a third call). + expect(clientC.calls).toBe(3); + expect(clientA.calls).toBe(2); + expect(clientB.calls).toBe(2); + + // Consensus parsed correctly. + expect(result.consensusScore).toBe(0.8); + expect(result.synthesis).toBe("We agreed on A."); + expect(result.keyPoints).toEqual(["point1", "point2"]); + + // Phase-change events observed. + const phaseChanges = events.filter((e) => e.type === "phase-change"); + expect(phaseChanges.length).toBeGreaterThanOrEqual(2); + expect(phaseChanges[phaseChanges.length - 1]).toMatchObject({ + status: "complete", + }); + + // Synthesis-complete fires. + expect(events.some((e) => e.type === "synthesis-complete")).toBe(true); + }); + + it("uses first agent as synthesizer by default", async () => { + const clientA = makeMockClient([ + "A1", + "A2", + JSON.stringify({ consensus: 0.4, summary: "ok", keyPoints: [] }), + ]); + const clientB = makeMockClient(["B1", "B2"]); + + const agents: RoundtableAgent[] = [ + { role: "advocate", profile: profile("claude", "a"), displayName: "alice" }, + { role: "critic", profile: profile("codex", "b"), displayName: "bob" }, + ]; + + const orchestrator = new RoundtableOrchestrator({ + clientFactory: (p) => + p.configDir.endsWith("-a") ? clientA : clientB, + sleep: async () => {}, + }); + + const result = await orchestrator.run({ + topic: "x", + agents, + rounds: 2, + // no synthesizer → defaults to first agent (alice) + }); + + expect(clientA.calls).toBe(3); // 2 rounds + synthesis + expect(clientB.calls).toBe(2); + expect(result.consensusScore).toBe(0.4); + }); + + it("returns consensus=0.5 and raw text when synthesizer returns non-JSON", async () => { + const clientA = makeMockClient([ + "A1", + "A2", + "Sorry, I can't produce JSON today.", + ]); + const clientB = makeMockClient(["B1", "B2"]); + + const agents: RoundtableAgent[] = [ + { role: "advocate", profile: profile("claude", "a"), displayName: "alice" }, + { role: "critic", profile: profile("codex", "b"), displayName: "bob" }, + ]; + + const orchestrator = new RoundtableOrchestrator({ + clientFactory: (p) => + p.configDir.endsWith("-a") ? clientA : clientB, + sleep: async () => {}, + }); + + const result = await orchestrator.run({ + topic: "x", + agents, + rounds: 2, + }); + + expect(result.consensusScore).toBe(0.5); + expect(result.synthesis).toBe("Sorry, I can't produce JSON today."); + expect(result.keyPoints).toEqual([]); + }); + + it("adaptive pacing is observable via the injected sleep", async () => { + const clientA = makeMockClient([ + "A1", + "A2", + JSON.stringify({ consensus: 1, summary: "done", keyPoints: [] }), + ]); + const clientB = makeMockClient(["B1", "B2"]); + + const sleeps: number[] = []; + const orchestrator = new RoundtableOrchestrator({ + clientFactory: (p) => + p.configDir.endsWith("-a") ? clientA : clientB, + sleep: async (ms) => { + sleeps.push(ms); + }, + }); + + await orchestrator.run({ + topic: "x", + agents: [ + { role: "advocate", profile: profile("claude", "a"), displayName: "alice" }, + { role: "critic", profile: profile("codex", "b"), displayName: "bob" }, + ], + rounds: 2, + }); + + // 4 turns total; sleep only fires *between* turns (3 times). + expect(sleeps.length).toBe(3); + // Each sleep should be at least the min reply grace of one policy (8_000 + // for codex, 12_000 for claude) — no zero sleeps. + expect(sleeps.every((ms) => ms >= 8_000)).toBe(true); + }); + + it("throws when fewer than 2 agents are provided", async () => { + const orchestrator = new RoundtableOrchestrator({ + clientFactory: () => makeMockClient([]), + sleep: async () => {}, + }); + await expect( + orchestrator.run({ + topic: "x", + agents: [ + { + role: "advocate", + profile: profile("claude", "only"), + displayName: "solo", + }, + ], + rounds: 1, + }), + ).rejects.toThrow(/at least 2 agents/); + }); + + it("throws when a virtual agent has no profile", async () => { + const orchestrator = new RoundtableOrchestrator({ + clientFactory: () => makeMockClient([]), + sleep: async () => {}, + }); + await expect( + orchestrator.run({ + topic: "x", + agents: [ + { role: "advocate", profile: profile("claude", "a"), displayName: "alice" }, + { role: "critic", virtualRole: "critic", displayName: "virt" }, + ], + rounds: 1, + }), + ).rejects.toThrow(/Virtual agents not yet supported/); + }); + + it("forces launchMode=worker on agent profiles", async () => { + const seen: Profile[] = []; + const clientA = makeMockClient([ + "A1", + "A2", + JSON.stringify({ consensus: 0.7, summary: "ok", keyPoints: [] }), + ]); + const clientB = makeMockClient(["B1", "B2"]); + const orchestrator = new RoundtableOrchestrator({ + clientFactory: (p) => { + seen.push(p); + return p.configDir.endsWith("-a") ? clientA : clientB; + }, + sleep: async () => {}, + }); + + await orchestrator.run({ + topic: "x", + agents: [ + { + role: "advocate", + profile: { ...profile("claude", "a"), launchMode: "native" }, + displayName: "alice", + }, + { + role: "critic", + profile: { ...profile("codex", "b"), launchMode: "native" }, + displayName: "bob", + }, + ], + rounds: 2, + }); + + expect(seen.every((p) => p.launchMode === "worker")).toBe(true); + }); +}); diff --git a/tests/unit/orchestration/staged-workflow.test.ts b/tests/unit/orchestration/staged-workflow.test.ts new file mode 100644 index 0000000..3fbc785 --- /dev/null +++ b/tests/unit/orchestration/staged-workflow.test.ts @@ -0,0 +1,110 @@ +import { describe, it, expect } from "vitest"; +import { + DEFAULT_COMPLETION_PATTERNS, + InMemoryMessageBus, + StagedWorkflowManager, + type StagedMessage, + type StagedPhase, +} from "../../../packages/core/src/orchestration/staged-workflow.js"; + +function msg(from: string, content: string, createdAt = 0): StagedMessage { + return { id: `${from}-${createdAt}-${Math.random()}`, from, content, createdAt }; +} + +describe("DEFAULT_COMPLETION_PATTERNS", () => { + it("matches PLAN_SHARED keywords", () => { + const p = DEFAULT_COMPLETION_PATTERNS.plan; + expect(p.some((r) => r.test("plan shared with team"))).toBe(true); + expect(p.some((r) => r.test("here is my approach"))).toBe(true); + expect(p.some((r) => r.test("ready"))).toBe(true); + }); + + it("matches EXEC_DONE keywords", () => { + const p = DEFAULT_COMPLETION_PATTERNS.exec; + expect(p.some((r) => r.test("implementation done"))).toBe(true); + expect(p.some((r) => r.test("finished the refactor"))).toBe(true); + expect(p.some((r) => r.test("implemented"))).toBe(true); + }); + + it("matches VERIFY_OK keywords", () => { + const p = DEFAULT_COMPLETION_PATTERNS.verify; + expect(p.some((r) => r.test("verify_ok"))).toBe(true); + expect(p.some((r) => r.test("review complete"))).toBe(true); + expect(p.some((r) => r.test("approved"))).toBe(true); + }); +}); + +describe("StagedWorkflowManager", () => { + it("progresses through all phases when every agent matches", async () => { + const bus = new InMemoryMessageBus(); + const phases: StagedPhase[] = ["plan", "exec", "verify"]; + const agents = ["alice", "bob"]; + + // Pre-populate every phase's completion messages. Because resetCursor + // rewinds the cursor on phase entry, the manager re-reads the full bus + // each phase — messages that match all three phase patterns will let + // the workflow advance straight through. + for (const p of phases) { + await bus.post(msg("alice", `${p} ready — plan shared done approved`)); + await bus.post(msg("bob", `${p} ready — plan shared done approved`)); + } + + const observed: Array = []; + const manager = new StagedWorkflowManager( + { + phases, + phaseTimeoutMs: { plan: 10_000, exec: 10_000, verify: 10_000 }, + pollIntervalMs: 1, + onPhaseChange: (p) => observed.push(p), + }, + { messageBus: bus, allAgents: agents }, + ); + + const result = await manager.run(); + expect(result.phase).toBe("complete"); + expect(result.phasesCompleted).toEqual(phases); + expect(result.phasesTimedOut).toEqual([]); + expect(observed).toContain("plan"); + expect(observed).toContain("exec"); + expect(observed).toContain("verify"); + expect(observed).toContain("complete"); + }); + + it("records a timeout when agents never match the completion pattern", async () => { + const bus = new InMemoryMessageBus(); + const manager = new StagedWorkflowManager( + { + phases: ["plan"], + phaseTimeoutMs: { plan: 20, exec: 20, verify: 20 }, + pollIntervalMs: 1, + }, + { messageBus: bus, allAgents: ["alice", "bob"] }, + ); + + // Post a non-matching message. + await bus.post(msg("alice", "still thinking")); + + const result = await manager.run(); + expect(result.phasesCompleted).toEqual([]); + expect(result.phasesTimedOut).toEqual(["plan"]); + expect(result.phase).toBe("complete"); // terminal regardless of timeout + }); + + it("holds the phase when only some agents match", async () => { + const bus = new InMemoryMessageBus(); + const manager = new StagedWorkflowManager( + { + phases: ["plan"], + phaseTimeoutMs: { plan: 30, exec: 30, verify: 30 }, + pollIntervalMs: 1, + }, + { messageBus: bus, allAgents: ["alice", "bob"] }, + ); + + await bus.post(msg("alice", "plan shared ready")); + // Never post bob's completion. + + const result = await manager.run(); + expect(result.phasesTimedOut).toEqual(["plan"]); + }); +}); diff --git a/tests/unit/orchestration/watchdog.test.ts b/tests/unit/orchestration/watchdog.test.ts new file mode 100644 index 0000000..0bd7743 --- /dev/null +++ b/tests/unit/orchestration/watchdog.test.ts @@ -0,0 +1,151 @@ +import { describe, it, expect } from "vitest"; +import { + AgentWatchdog, + isLowSignalWatchdogReply, + WATCHDOG_STALL_OPTIONS, + type WatchdogEvent, +} from "../../../packages/core/src/orchestration/watchdog.js"; + +describe("isLowSignalWatchdogReply", () => { + it("flags short status filler", () => { + expect(isLowSignalWatchdogReply("still working")).toBe(true); + expect(isLowSignalWatchdogReply("looking into it")).toBe(true); + expect(isLowSignalWatchdogReply("status update:")).toBe(true); + }); + + it("flags no-progress status indicators", () => { + expect( + isLowSignalWatchdogReply( + "Current status: conversation remains idle; awaiting a concrete task.", + ), + ).toBe(true); + }); + + it("treats concrete progress updates as high-signal", () => { + expect( + isLowSignalWatchdogReply( + "Implemented the orchestrator and wired up the delivery policy; all tests pass.", + ), + ).toBe(false); + }); + + it("flags empty strings", () => { + expect(isLowSignalWatchdogReply("")).toBe(true); + expect(isLowSignalWatchdogReply(" ")).toBe(true); + }); +}); + +describe("AgentWatchdog", () => { + function makeClock(startMs: number) { + let t = startMs; + return { + now: () => t, + advance: (delta: number) => { + t += delta; + }, + }; + } + + it("emits a nudge after the nudge threshold", () => { + const clock = makeClock(1_000_000); + const nudges: Array<{ agentId: string; text: string }> = []; + const events: WatchdogEvent[] = []; + + const dog = new AgentWatchdog({ + now: clock.now, + nudgeAfterMs: 3_000, + stallAfterMs: 5_000, + isAgentActive: () => true, + getLastMessageAt: () => 1_000_000, // agent never spoke after start + postDecision: () => {}, + postNudge: (id, text) => nudges.push({ agentId: id, text }), + onEvent: (evt) => events.push(evt), + }); + + dog.track("alice"); + // below threshold + clock.advance(2_500); + expect(dog.tick()).toEqual([]); + expect(nudges).toHaveLength(0); + + // past threshold + clock.advance(1_000); + const fired = dog.tick(); + expect(fired).toHaveLength(1); + expect(fired[0].type).toBe("nudge"); + expect(fired[0].agentId).toBe("alice"); + expect(nudges).toHaveLength(1); + expect(events.some((e) => e.type === "nudge")).toBe(true); + }); + + it("emits stall + decision after the stall threshold past nudge", () => { + const clock = makeClock(0); + const decisions: Array<{ agentId: string; options: readonly string[] }> = []; + + const dog = new AgentWatchdog({ + now: clock.now, + nudgeAfterMs: 1_000, + stallAfterMs: 2_000, + isAgentActive: () => true, + getLastMessageAt: () => 0, + postDecision: (id, options) => decisions.push({ agentId: id, options }), + }); + + dog.track("bob"); + + // First tick past nudge → nudge + clock.advance(1_500); + const firstEvents = dog.tick(); + expect(firstEvents.some((e) => e.type === "nudge")).toBe(true); + + // Not yet past stall + clock.advance(1_000); + const midEvents = dog.tick(); + expect(midEvents).toEqual([]); + + // Past stall + clock.advance(1_500); + const stallEvents = dog.tick(); + expect(stallEvents.some((e) => e.type === "stall")).toBe(true); + expect(stallEvents.some((e) => e.type === "decision")).toBe(true); + expect(decisions).toHaveLength(1); + expect(decisions[0].options).toEqual(WATCHDOG_STALL_OPTIONS); + }); + + it("reset() clears nudge marker so nudges fire again", () => { + const clock = makeClock(0); + const dog = new AgentWatchdog({ + now: clock.now, + nudgeAfterMs: 1_000, + stallAfterMs: 2_000, + isAgentActive: () => true, + getLastMessageAt: () => 0, + postDecision: () => {}, + }); + dog.track("carol"); + clock.advance(2_000); + dog.tick(); // nudge + + dog.reset("carol"); + expect(dog.snapshot().carol).toEqual({}); + + clock.advance(2_000); + const events = dog.tick(); + expect(events.some((e) => e.type === "nudge")).toBe(true); + }); + + it("skips agents flagged inactive by the host", () => { + const clock = makeClock(0); + const dog = new AgentWatchdog({ + now: clock.now, + nudgeAfterMs: 1_000, + stallAfterMs: 2_000, + isAgentActive: () => false, + getLastMessageAt: () => 0, + postDecision: () => {}, + }); + dog.track("ghost"); + clock.advance(10_000); + expect(dog.tick()).toEqual([]); + }); +}); From b3c7538d83b334904b4f8cafeea1b06960a1bf2c Mon Sep 17 00:00:00 2001 From: Bailey Dixon Date: Sat, 18 Apr 2026 10:40:09 -0400 Subject: [PATCH 10/17] feat: roundtable CLI + dashboard chat + docs + 0.4.0 (Wave 2: Phases 6+7+9) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 6 — arc roundtable CLI + MCP team contract - arc roundtable --agents a,b,c --rounds 2 --synthesizer --roles --format plain|json: streaming colored transcript + synthesis + consensus score - MCP: arc_chat (one-shot chat on active profile, read-only by default) - MCP: arc_roundtable (headless roundtable, returns transcript + synthesis + consensusScore + keyPoints + roundtableId + durationMs) - MCP team_say / team_read / team_status / team_done / team_plan / team_ask — shared in-memory TeamSessionStore (process-wide; per- team isolation requires separate MCP server processes; documented) - Existing 5 supervision MCP tools unchanged; integration tests relaxed from exact-tool-count to arrayContaining Phase 7 — Dashboard chat view - packages/dashboard/src/ws.ts: session routing added on top of legacy broadcast. Clients send { type: "hello", sessionId } on connect; ws.broadcastTo(sessionId, event, data) targets one client. broadcast() preserved for fan-out (roundtable view). - 5 new routes: POST /api/chat/message (streams chat-chunk events over WS), POST /api/chat/confirm (answer chat-confirm-needed events; 60s auto-deny timeout), GET/DELETE /api/chat/sessions[/id] - Chat core loaded via dynamic import(@axiom-labs/arc-core) with 503 fallback if exports are missing — defensive, safe now that Phase 4/5 exports are in core - public/components/chat.js: session list sidebar, streaming message thread, expandable tool-call cards, confirmation modal, bearer-token bootstrap - ~240 lines of chat-view CSS using existing tokens - vitest.config.ts + tsconfig.json: include packages/*/tests/** - 13 new dashboard tests (ws-session + api-chat) Phase 9 — Docs + 0.4.0 release - New user docs: chat.md, roundtable.md, multi-agent-pipelines.md - Extended architecture page; updated VitePress sidebar - FEATURES.md: Phases 2-7 checked off; Phase 8 remains [ ] - CLAUDE.md: orchestration-layer bullet extended with arc chat + RoundtableOrchestrator + StagedWorkflowManager + AgentWatchdog - README quickstart: chat path added alongside fast/full paths - CHANGELOG.md: 0.4.0 entry - Version bump 0.3.0 -> 0.4.0 via scripts/version.js (syncs packages/cli/src/version.ts, root package.json, site/package.json) Build 573 KB; typecheck clean; 1263/1263 tests pass; web:build succeeds in 10.83s. Co-Authored-By: Claude Opus 4.7 (1M context) --- .bg-shell/manifest.json | Bin 0 -> 2 bytes CHANGELOG.md | 40 +- CLAUDE.md | 10 +- FEATURES.md | 18 +- README.md | 22 + docs/plans/ai-and-roundtable.md | 6 +- package.json | 2 +- packages/cli/src/cli.ts | 43 ++ packages/cli/src/commands/roundtable.ts | 299 ++++++++++ packages/cli/src/version.ts | 2 +- packages/dashboard/public/components/chat.js | 525 ++++++++++++++++++ packages/dashboard/public/index.html | 2 + .../dashboard/public/styles/components.css | 282 ++++++++++ packages/dashboard/src/api.ts | 503 +++++++++++++++++ packages/dashboard/src/server.ts | 7 + packages/dashboard/src/types.ts | 7 + packages/dashboard/src/ws.ts | 76 ++- packages/dashboard/tests/api-chat.test.ts | 411 ++++++++++++++ packages/dashboard/tests/ws-session.test.ts | 255 +++++++++ packages/mcp/src/server.ts | 8 + packages/mcp/src/tools/chat.ts | 201 +++++++ packages/mcp/src/tools/roundtable.ts | 146 +++++ packages/mcp/src/tools/team/ask.ts | 45 ++ packages/mcp/src/tools/team/done.ts | 53 ++ packages/mcp/src/tools/team/index.ts | 35 ++ packages/mcp/src/tools/team/plan.ts | 40 ++ packages/mcp/src/tools/team/read.ts | 47 ++ packages/mcp/src/tools/team/say.ts | 47 ++ packages/mcp/src/tools/team/shared-bus.ts | 111 ++++ packages/mcp/src/tools/team/status.ts | 25 + site/package.json | 2 +- site/src/components/Features.tsx | 4 +- tests/integration/mcp-arc-chat-tool.test.ts | 172 ++++++ .../mcp-arc-roundtable-tool.test.ts | 192 +++++++ tests/integration/mcp-host.test.ts | 6 +- tests/integration/mcp-http-server.test.ts | 10 +- tests/integration/mcp-server.test.ts | 6 +- tests/integration/mcp-team-tools.test.ts | 144 +++++ tests/integration/roundtable-cli.test.ts | 338 +++++++++++ tsconfig.json | 3 +- user-docs/.vitepress/config.mts | 8 + user-docs/architecture/index.md | 25 +- user-docs/guide/chat.md | 104 ++++ user-docs/guide/multi-agent-pipelines.md | 100 ++++ user-docs/guide/roundtable.md | 115 ++++ vitest.config.ts | 5 +- 46 files changed, 4458 insertions(+), 44 deletions(-) create mode 100644 .bg-shell/manifest.json create mode 100644 packages/cli/src/commands/roundtable.ts create mode 100644 packages/dashboard/public/components/chat.js create mode 100644 packages/dashboard/tests/api-chat.test.ts create mode 100644 packages/dashboard/tests/ws-session.test.ts create mode 100644 packages/mcp/src/tools/chat.ts create mode 100644 packages/mcp/src/tools/roundtable.ts create mode 100644 packages/mcp/src/tools/team/ask.ts create mode 100644 packages/mcp/src/tools/team/done.ts create mode 100644 packages/mcp/src/tools/team/index.ts create mode 100644 packages/mcp/src/tools/team/plan.ts create mode 100644 packages/mcp/src/tools/team/read.ts create mode 100644 packages/mcp/src/tools/team/say.ts create mode 100644 packages/mcp/src/tools/team/shared-bus.ts create mode 100644 packages/mcp/src/tools/team/status.ts create mode 100644 tests/integration/mcp-arc-chat-tool.test.ts create mode 100644 tests/integration/mcp-arc-roundtable-tool.test.ts create mode 100644 tests/integration/mcp-team-tools.test.ts create mode 100644 tests/integration/roundtable-cli.test.ts create mode 100644 user-docs/guide/chat.md create mode 100644 user-docs/guide/multi-agent-pipelines.md create mode 100644 user-docs/guide/roundtable.md diff --git a/.bg-shell/manifest.json b/.bg-shell/manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..09f370e38f498a462e1ca0faa724559b6630c04f GIT binary patch literal 2 JcmZQz0000200961 literal 0 HcmV?d00001 diff --git a/CHANGELOG.md b/CHANGELOG.md index 145526d..4b44418 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,43 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/), and this ## [Unreleased] +## [0.4.0] - 2026-04-18 + +### Added + +- **`arc chat` CLI** — interactive REPL with streaming responses, tool use, and permission modes (`read-only` / `supervised` / `autonomous`). REPL slash commands: `/exit`, `/save`, `/new`, `/mode`, `/clear`, `/sessions`, `/resume`, `/help`. One-shot mode via `--once ""`. +- **`ChatSession` per-profile persistence** — sessions stored at `~/.arc/profiles//chat-sessions/.json` with atomic writes; `--session ` / `/resume ` to pick up where you left off. +- **Roundtable orchestrator** — `RoundtableOrchestrator` class drives the existing roundtable hook over multiple profiles with adaptive delivery pacing (EMA latency) and a designated synthesizer returning a consensus score. Ported from Agent-Forge. +- **Staged workflow state machine** — `StagedWorkflowManager` implements PLAN → EXEC → VERIFY with per-phase completion patterns and timeouts. +- **Agent stall watchdog** — `AgentWatchdog` nudges agents at 3 min, marks them stalled at 5 min, and runs a decision protocol (ported from Agent-Forge). +- **Agent client abstraction** — `packages/core/src/agent-client/` one-shot CLI invocation for Claude / Codex / Gemini with MCP config injection per `mcpMode` variant and per-tool stream parsers. +- **Tool registry + agent loop** — `packages/core/src/agent/` with three permission modes and ~16 ARC tools (11 read, 4 write, 1 dangerous) wired to existing handlers. +- **Knowledge endowment** — `packages/core/src/knowledge/` with static ARC catalog (architecture + 52-entry command reference + 16-term glossary), 33-entry feature index, and `buildSystemPrompt()` runtime composer under 4K tokens (~1.3K typical). +- **Launch modes** — `launchMode: "native" | "worker"` field on Profile (default `native`). `--native` / `--worker` CLI overrides. TUI `m` key in ProfilesView toggles. Doctor check for deprecated `CLAUDE_CODE_NO_FLICKER` env var. +- **Bare launch** — `arc run ` and `arc launch --bare ` skip the ARC overlay entirely (no env injection, no hook pipeline). Tool-name inference falls through to bare when no matching profile exists. +- **Clearable active profile** — `arc profile switch none` and `arc profile clear-active` set `activeProfile` to `null`. Rendered as `(none)` in CLI and TUI. +- **Agent instructions** — `instructions` / `instructionsFile` fields on Profile, injected as `ARC_AGENT_INSTRUCTIONS` env var at launch. `arc instructions` CLI: `show` / `set` / `edit` / `clear`. +- **OpenAI-compatible providers** — `openai-compat` auth type + `ProviderConfig` (`baseUrl`, `model`, `apiKeyEnvVar`, `displayName`) on Profile; 7 presets (OpenRouter, Ollama, LM Studio, Together, Groq, MiniMax, DeepSeek). `arc provider` CLI: `set` / `show` / `clear` / `presets`. +- **Backup / export / import** — `arc backup create/restore/list` for a gzipped `~/.arc/` archive (credentials excluded by default); `arc profile export` / `arc profile import-file` for single-profile transport with inlined instructions. +- **Profile cloning** — `cloneProfile()` core function + `arc profile clone [--no-copy-dir]` CLI + `Shift+C` inline clone in ProfilesView. +- **Launch history** — `~/.arc/history.json` records each launch (profile, tool, timestamp, outcome, exitCode); DashView shows recent launches + activity log entries. +- **Toast notifications** — `ToastProvider` + `useToast()` with auto-dismiss (2.5 s); mounted globally in the Dashboard. +- **Interactive sidebar queue** — Enter on a profile row in the Sidebar quick-launches without switching views. + +### Documentation + +- `user-docs/guide/chat.md` — Chat Guide (quickstart, permission modes, REPL commands, session persistence, known limitations). +- `user-docs/guide/roundtable.md` — Running Roundtables (concepts, programmatic API, adaptive pacing, worker-mode requirement). +- `user-docs/guide/multi-agent-pipelines.md` — PLAN → EXEC → VERIFY state machine with completion patterns and timeouts. +- `user-docs/architecture/index.md` — extended "Agent Client + Chat + Orchestration" section. + +### Coming in 0.4.x / 0.5.x + +- `arc roundtable` CLI with streaming transcript and per-agent color coding (Phase 6). +- MCP tools: `arc_chat`, `arc_roundtable`, and the 6-tool `team_*` contract (Phase 6). +- Dashboard chat view with per-session WebSocket streaming and tool-call visualization (Phase 7). +- Dashboard roundtable + pipelines views (Phase 8). + ## [0.2.0] - 2026-04-03 All 25 phases of the [v2.0 spec](./docs/spec/SPEC.md) are now implemented. ARC has evolved from a profile manager into a unified agent runtime control plane, absorbing the [Axiom-Supervisor](https://github.com/Codename-11/axiom-supervisor) project. @@ -186,6 +223,7 @@ All 25 phases of the [v2.0 spec](./docs/spec/SPEC.md) are now implemented. ARC h - **Light mode contrast** — WCAG AA compliant dimmed/border colors, explicit `colors.text` on import hint - **React hooks violation** — `useScreenSize()` moved above conditional returns in DashView -[Unreleased]: https://github.com/Codename-11/ARC/compare/v0.2.0...HEAD +[Unreleased]: https://github.com/Codename-11/ARC/compare/v0.4.0...HEAD +[0.4.0]: https://github.com/Codename-11/ARC/compare/v0.2.0...v0.4.0 [0.2.0]: https://github.com/Codename-11/ARC/compare/v0.1.0...v0.2.0 [0.1.0]: https://github.com/Codename-11/ARC/releases/tag/v0.1.0 diff --git a/CLAUDE.md b/CLAUDE.md index cfa120e..22ffd14 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -29,15 +29,17 @@ ARC (Agent Runtime Control) is a CLI + TUI for managing multiple agent profiles - **Landing site:** `site/` — React 19 + Vite + Tailwind v4, Nothing-design marketing page - **Deployment:** Root `Dockerfile` + `nginx.conf` — multi-stage build merging `site/` at `/` and `user-docs/` at `/docs/` into single nginx container - **Web Dashboard:** 13 view components (Overview, Sessions, Traces, Risk, Tasks, Skills, Memory, Agents, Factory + Profiles, Diagnostics, Sync, Plugins) -- **Orchestration layer:** Hook pipeline (8 hooks in priority order), roundtable multi-agent discussions, task delegation protocol, interagent routing, source classification +- **Orchestration layer:** Hook pipeline (8 hooks in priority order), roundtable multi-agent discussions, task delegation protocol, interagent routing, source classification, `arc chat` REPL, `RoundtableOrchestrator`, `StagedWorkflowManager` (PLAN/EXEC/VERIFY), `AgentWatchdog` (stall detection) - **Adapters:** Claude Code (SDK + plugin + hooks), Codex CLI, Gemini CLI, OpenClaw (native plugin), Hermes Agent (MCP bridge), OpenAI Compatible (custom providers), Generic (fallback for any tool) - **Agent instructions:** `instructions` / `instructionsFile` fields on Profile; resolved at launch, injected as `ARC_AGENT_INSTRUCTIONS` env var; `arc instructions` CLI for show/set/edit/clear - **Custom providers:** `openai-compat` auth type + `ProviderConfig` (baseUrl, model, apiKeyEnvVar) on Profile; 7 presets (OpenRouter, Ollama, LM Studio, Together, Groq, MiniMax, DeepSeek); `arc provider` CLI for set/show/clear/presets - **Launch modes:** `launchMode: "native" | "worker"` on Profile (default `native`). Native uses full TTY handoff so the tool paints its own TUI; worker uses `spawnManagedProcess` for ARC-supervised orchestration. CLI flags `--native` / `--worker` override. TUI: `m` in ProfilesView toggles. Roundtable forces worker regardless. - **Bare launch:** `arc run ` and `arc launch --bare ` skip ARC overlay entirely (no env injection, no hook pipeline). Tool-name inference falls through to bare when no matching profile exists. `activeProfile` may be `null` — cleared via `arc profile switch none` or `arc profile clear-active`, rendered as `(none)`. -- **Agent client (internal):** `packages/core/src/agent-client/` — CLI-spawn clients for Claude/Codex/Gemini with MCP config injection per `mcpMode` variant and per-tool stream parsers. Foundation for upcoming `arc chat` + roundtable orchestrator. See `docs/plans/ai-and-roundtable.md`. -- **Agent loop + tool registry (internal):** `packages/core/src/agent/` — tool registry with read-only/supervised/autonomous permission modes, agent loop for tool-use dispatch. -- **Knowledge (internal):** `packages/core/src/knowledge/` — static + runtime system prompt composition (ARC architecture, command reference, live state). +- **Agent client:** `packages/core/src/agent-client/` — CLI-spawn clients for Claude/Codex/Gemini with MCP config injection per `mcpMode` variant and per-tool stream parsers. Substrate for `arc chat` + `RoundtableOrchestrator`. See `docs/plans/ai-and-roundtable.md`. +- **Agent loop + tool registry:** `packages/core/src/agent/` — tool registry with read-only/supervised/autonomous permission modes, `runAgent` generator for tool-use dispatch, ~16 ARC tools wired to existing handlers (list_profiles, clone_profile, switch_active_profile, query_logs, etc.). +- **Knowledge:** `packages/core/src/knowledge/` — static ARC catalog (architecture + 52-entry command reference + 16-term glossary) + 33-entry feature index + `buildSystemPrompt()` runtime composer under 4K tokens. +- **Chat:** `packages/core/src/chat/` — `ChatSession` primitive + per-profile store at `~/.arc/profiles//chat-sessions/` (atomic writes, resume support). Consumed by `arc chat` CLI (`packages/cli/src/commands/chat.ts`). +- **Orchestration:** `packages/core/src/orchestration/` — `RoundtableOrchestrator` (driver over the roundtable hook with adaptive pacing + synthesizer), `StagedWorkflowManager` (PLAN → EXEC → VERIFY with completion patterns + per-phase timeouts), `AgentWatchdog` (3-min nudge / 5-min stall), `AgentDeliveryPolicy` + EMA latency tracking (ported from Agent-Forge). ## Key Conventions diff --git a/FEATURES.md b/FEATURES.md index 6d8e85c..6c41345 100644 --- a/FEATURES.md +++ b/FEATURES.md @@ -34,14 +34,16 @@ Tracking file for planned features, enhancements, and ideas. Checked items are s - [ ] **Team/shared config** — repo-checked config with local secret overlays - [x] **Backup/export/import** — `arc backup create/restore/list` (gzipped archive of `~/.arc/`, credentials excluded by default) + `arc profile export` / `arc profile import-file` (single-profile JSON transport with inlined instructions) - [x] **Managed updates** — self-update system with npm registry check and TUI update banner -- [x] **Agent client foundation** — internal CLI-spawn agent client at `packages/core/src/agent-client/` (Claude/Codex/Gemini), MCP config injection per `mcpMode`, stream parsers. Building block for upcoming chat + roundtable orchestrator (see `docs/plans/ai-and-roundtable.md` Phase 1) -- [ ] **Tool registry + agent loop** — `packages/core/src/agent/` tool registry with read-only/supervised/autonomous permission modes; ARC tool set wired to existing handlers (list_profiles, clone_profile, configure_provider, etc.). Plan Phase 2 -- [ ] **Knowledge endowment** — `packages/core/src/knowledge/` system prompt composition (static architecture + command reference + runtime state). Plan Phase 3 -- [ ] **`arc chat` CLI** — terminal REPL using active profile's agent client, streaming output, permission-gated tool calls. Plan Phase 4 -- [ ] **Roundtable orchestrator** — driver loop over the existing roundtable hook; adaptive pacing ported from Agent-Forge. Plan Phase 5 -- [ ] **`arc roundtable` CLI + team MCP tools** — `arc roundtable --agents a,b,c` with streaming transcript; `arc_chat` / `arc_roundtable` / `team_*` MCP tools. Plan Phase 6 -- [ ] **Dashboard chat view** — per-session WS streaming, tool-call visualization, permission-mode toggle, confirmation modal. Plan Phase 7 -- [ ] **Dashboard roundtable + pipelines view** — configure + run multi-agent flows from the browser with live transcript. Plan Phase 8 +- [x] **Agent client foundation** — internal CLI-spawn agent client at `packages/core/src/agent-client/` (Claude/Codex/Gemini), MCP config injection per `mcpMode`, stream parsers. Plan Phase 1 +- [x] **Tool registry + agent loop** — `packages/core/src/agent/` with ~16 ARC tools spanning read / write / dangerous tiers; three permission modes (read-only / supervised / autonomous); `runAgent` generator. Plan Phase 2 +- [x] **Knowledge endowment** — `packages/core/src/knowledge/` system prompt composition (ARC architecture + 52-entry command catalog + 33-entry feature index + 16-term glossary + runtime state). Plan Phase 3 +- [x] **`arc chat` CLI** — terminal REPL using active profile's agent client with streaming output, permission-gated tool calls, per-profile session persistence at `~/.arc/profiles//chat-sessions/`, REPL slash commands. Plan Phase 4 (0.4.0) +- [x] **Roundtable orchestrator** — `RoundtableOrchestrator` driving the existing roundtable hook with adaptive pacing (EMA latency) and synthesizer-driven consensus score. Plan Phase 5 (0.4.0) +- [x] **Staged workflow state machine** — `StagedWorkflowManager` PLAN → EXEC → VERIFY with completion patterns and per-phase timeouts (ported from Agent-Forge) +- [x] **Agent stall watchdog** — nudge at 3 min, mark stalled at 5 min, decision protocol (ported from Agent-Forge) +- [x] **`arc roundtable` CLI + team MCP tools** — `arc roundtable --agents a,b,c` with streaming transcript; `arc_chat` / `arc_roundtable` / 6 `team_*` MCP tools. Plan Phase 6 (0.4.0) +- [x] **Dashboard chat view** — per-session WS streaming, tool-call visualization, permission-mode toggle, confirmation modal. Plan Phase 7 (0.4.0) +- [ ] **Dashboard roundtable + pipelines view** — configure + run multi-agent flows from the browser with live transcript. Plan Phase 8 (ships in 0.4.x) ## Priority 4 — Observability & Polish diff --git a/README.md b/README.md index 40c944e..b875c7d 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ One binary. One config directory (`~/.arc/`). Every agent runtime — Claude Cod | Layer | Capabilities | |-------|-------------| +| **Chat** | Chat interactively with your profile's model, with tool use and permission gates — `arc chat` | | **Identity** | Named profiles, credentials, auth (OAuth/API key/Bedrock/Vertex/Foundry), OS keyring, env isolation | | **Launch** | Tool detection, shell shims, per-profile flags, workspace-aware auto-selection (`arc.json`) | | **Adapters** | Claude Code (SDK bridge + hooks + plugin), Codex CLI, Gemini CLI, OpenClaw, Generic (MCP/HTTP) | @@ -161,6 +162,16 @@ arc use personal arc status ``` +**Chat with your profile:** + +```bash +arc chat # REPL over the active profile, with tool use +arc chat --once "list my profiles" # one-shot, exit when done +arc chat --mode read-only # safe mode — no write-tool calls +``` + +The model is your profile's CLI tool (Claude, Codex, or Gemini). ARC composes a system prompt from its knowledge layer, streams the response back, and dispatches any ARC tool calls the model makes. See the [Chat Guide](https://arc-cli.dev/docs/guide/chat). + ## Screenshots | | | @@ -199,6 +210,17 @@ arc launch --bare # Same as `arc run` arc launch [name] -- --model opus # Pass flags through to the tool ``` +### Chat + +```bash +arc chat # Interactive REPL over the active profile +arc chat --once "" # One-shot, exit when done +arc chat --profile # Override the active profile +arc chat --mode read-only # Forbid any write tools +arc chat --session # Resume a prior session +arc chat --new # Start fresh +``` + ### Dashboard ```bash diff --git a/docs/plans/ai-and-roundtable.md b/docs/plans/ai-and-roundtable.md index 39b0e84..39a66ca 100644 --- a/docs/plans/ai-and-roundtable.md +++ b/docs/plans/ai-and-roundtable.md @@ -409,13 +409,13 @@ Update checkboxes in-place as phases complete. Add a `Completed YYYY-MM-DD` mark - [x] **Completed 2026-04-18** — commit `443a78c`. `ARC_KNOWLEDGE` (architecture + 52-entry command catalog + 16-term glossary), `FEATURES_INDEX` (33 entries), `buildSystemPrompt()` composes 6 sections under 4K tokens (~1284 typical). 27 tests. ### Phase 4 — CLI `arc chat` -- [ ] Not started +- [x] **Completed 2026-04-18** — commit `a14bedc`. `arc chat` REPL with streaming output, `--profile` / `--mode` / `--once` / `--no-tools` / `--session` / `--new` flags. Slash commands (`/exit`, `/save`, `/new`, `/mode`, `/clear`, `/sessions`, `/resume`, `/help`). `ChatSession` primitive + per-profile store at `~/.arc/profiles//chat-sessions/`. Supervised-mode confirm gate, read-only / supervised / autonomous permission modes. O(n²) context replay noted as known limitation. ### Phase 5 — Roundtable orchestrator -- [ ] Not started +- [x] **Completed 2026-04-18** — commit `a14bedc`. `RoundtableOrchestrator` driving the existing roundtable hook, adaptive pacing + EMA latency, synthesizer-driven consensus score (tolerant JSON parsing). `StagedWorkflowManager` PLAN/EXEC/VERIFY and `AgentWatchdog` ported from Agent-Forge. `launchMode: "worker"` forced for every participating agent. ### Phase 6 — `arc roundtable` CLI + MCP tools -- [ ] Not started +- [x] **Completed 2026-04-18** — `arc roundtable` CLI (streaming + JSON), `arc_chat` + `arc_roundtable` MCP tools, 6-tool team contract (`team_say/read/status/done/plan/ask`) with in-memory shared bus. MCP server now exposes 13 tools. 4 new integration tests (49 tests pass). Limitation: team tools use process-wide bus — MCP SDK doesn't surface per-call session id. ### Phase 7 — Dashboard chat view - [ ] Not started diff --git a/package.json b/package.json index cc0b85d..43adc4f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@axiom-labs/arc-cli", - "version": "0.2.0", + "version": "0.4.0", "type": "module", "description": "ARC — Agent Runtime Control. Unified CLI for managing profiles and environments for agent tools (Claude, Gemini, Codex, and more).", "main": "./dist/index.js", diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 6d770de..33c6fd4 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -343,6 +343,49 @@ REPL commands: }, ); + program + .command("roundtable ") + .description("Run a multi-agent roundtable discussion across profiles") + .option("--agents ", "Comma-separated profile names (e.g. work-claude,work-codex)") + .option("--rounds ", "Number of discussion rounds", "2") + .option("--synthesizer ", "Profile that writes the final summary (default: first agent)") + .option("--roles ", "Comma-separated roles matching --agents order (advocate|critic|neutral)") + .option("--format ", "Output mode (plain|json)", "plain") + .option("--no-pacing", "Disable adaptive delivery pacing (faster, for tests)") + .addHelpText( + "after", + ` +Examples: + $ arc roundtable "should we rewrite X?" --agents work-claude,work-codex,work-gemini + $ arc roundtable "approach for auth?" --agents a,b --rounds 3 --synthesizer b + $ arc roundtable "risk review" --agents a,b,c --roles advocate,critic,neutral + $ arc roundtable "ship it?" --agents a,b --format json +`, + ) + .action( + async ( + topic: string, + opts: { + agents?: string; + rounds?: string; + synthesizer?: string; + roles?: string; + format?: string; + pacing?: boolean; + }, + ) => { + const mod = await import("./commands/roundtable.js"); + await mod.handleRoundtable(topic, { + agents: opts.agents, + rounds: opts.rounds, + synthesizer: opts.synthesizer, + roles: opts.roles, + format: opts.format as "plain" | "json" | undefined, + pacing: opts.pacing, + }); + }, + ); + program .command("set-key [name]") .description("Store an API key for a profile") diff --git a/packages/cli/src/commands/roundtable.ts b/packages/cli/src/commands/roundtable.ts new file mode 100644 index 0000000..f3bd8e0 --- /dev/null +++ b/packages/cli/src/commands/roundtable.ts @@ -0,0 +1,299 @@ +/** + * `arc roundtable` — spawn a multi-agent discussion using the Phase 5 + * orchestrator. Streams per-agent turns + synthesis to stdout with colour- + * coded role headers. + * + * See docs/plans/ai-and-roundtable.md — Phase 6. + */ + +import pc from "picocolors"; +import { + RoundtableOrchestrator, + loadConfig, + type AgentChunk, + type Profile, + type RoundtableAgent, + type RoundtableEvent, + type RoundtableResult, + type RoundtableRole, +} from "@axiom-labs/arc-core"; + +// --------------------------------------------------------------------------- +// Options +// --------------------------------------------------------------------------- + +export interface RoundtableCliOptions { + agents?: string; + rounds?: string | number; + synthesizer?: string; + roles?: string; + format?: "plain" | "json"; + pacing?: boolean; +} + +// --------------------------------------------------------------------------- +// IO helpers +// --------------------------------------------------------------------------- + +function writeText(s: string): void { + process.stdout.write(s); +} +function writeLine(s: string): void { + process.stdout.write(s + "\n"); +} +function printError(s: string): void { + process.stderr.write(pc.red("\u2716") + " " + s + "\n"); +} + +const ROLE_COLORS: Record string> = { + advocate: pc.green, + critic: pc.red, + neutral: pc.cyan, + synthesizer: pc.magenta, +}; + +function colorize(role: RoundtableRole, text: string): string { + const c = ROLE_COLORS[role] ?? pc.white; + return c(text); +} + +// --------------------------------------------------------------------------- +// Parsing helpers +// --------------------------------------------------------------------------- + +function parseRole(val: string, agentIndex: number): RoundtableRole { + const v = val.trim().toLowerCase(); + if (v === "advocate" || v === "critic" || v === "neutral" || v === "synthesizer") { + return v; + } + throw new Error( + `Invalid role "${val}" at agent position ${agentIndex + 1}. Expected: advocate | critic | neutral | synthesizer.`, + ); +} + +function defaultRole(index: number): RoundtableRole { + if (index === 0) return "advocate"; + if (index === 1) return "critic"; + return "neutral"; +} + +function parseList(val: string | undefined): string[] { + if (!val) return []; + return val + .split(",") + .map((s) => s.trim()) + .filter(Boolean); +} + +function parseRounds(val: string | number | undefined): number { + if (val === undefined) return 2; + const n = typeof val === "number" ? val : parseInt(val, 10); + if (!Number.isFinite(n) || n < 1) { + throw new Error(`--rounds must be a positive integer (got "${val}")`); + } + return n; +} + +// --------------------------------------------------------------------------- +// Orchestrator option injection — allow tests to override via env hook +// --------------------------------------------------------------------------- + +export interface RoundtableCliDeps { + orchestratorFactory?: () => RoundtableOrchestrator; +} + +// --------------------------------------------------------------------------- +// Entry +// --------------------------------------------------------------------------- + +export async function handleRoundtable( + topic: string, + opts: RoundtableCliOptions, + deps: RoundtableCliDeps = {}, +): Promise { + if (!topic || !topic.trim()) { + printError("A topic is required: arc roundtable --agents a,b,c"); + process.exit(1); + return; + } + + const format = opts.format ?? "plain"; + if (format !== "plain" && format !== "json") { + printError(`--format must be "plain" or "json" (got "${format}")`); + process.exit(1); + return; + } + + // ── Parse agents + roles ────────────────────────────── + const agentNames = parseList(opts.agents); + if (agentNames.length < 2) { + printError( + `Roundtable requires at least 2 agents. Pass them via --agents a,b,c (got ${agentNames.length}).`, + ); + process.exit(1); + return; + } + + const rolesList = parseList(opts.roles); + if (rolesList.length > 0 && rolesList.length !== agentNames.length) { + printError( + `--roles must have the same number of entries as --agents (got ${rolesList.length} roles for ${agentNames.length} agents).`, + ); + process.exit(1); + return; + } + + let rounds: number; + try { + rounds = parseRounds(opts.rounds); + } catch (err) { + printError(err instanceof Error ? err.message : String(err)); + process.exit(1); + return; + } + + // ── Load profiles ───────────────────────────────────── + const config = loadConfig(); + const agents: RoundtableAgent[] = []; + for (let i = 0; i < agentNames.length; i++) { + const name = agentNames[i]; + const profile: Profile | undefined = config.profiles[name]; + if (!profile) { + printError( + `Profile "${name}" not found. Run 'arc list' to see available profiles.`, + ); + process.exit(1); + return; + } + if (!profile.tool) { + printError( + `Profile "${name}" has no tool set — cannot participate in roundtable.`, + ); + process.exit(1); + return; + } + let role: RoundtableRole; + try { + role = rolesList.length > 0 ? parseRole(rolesList[i], i) : defaultRole(i); + } catch (err) { + printError(err instanceof Error ? err.message : String(err)); + process.exit(1); + return; + } + agents.push({ profile, role, displayName: name }); + } + + // ── Resolve synthesizer ─────────────────────────────── + let synthesizer: RoundtableAgent = agents[0]; + if (opts.synthesizer) { + const match = agents.find((a) => a.displayName === opts.synthesizer); + if (!match) { + printError( + `--synthesizer "${opts.synthesizer}" must be one of --agents. Got agents: ${agentNames.join(", ")}.`, + ); + process.exit(1); + return; + } + synthesizer = match; + } + + // ── Build orchestrator ──────────────────────────────── + const orchestrator = deps.orchestratorFactory + ? deps.orchestratorFactory() + : new RoundtableOrchestrator( + opts.pacing === false + ? { sleep: async () => {} } + : {}, + ); + + // ── Event handler ───────────────────────────────────── + const streaming = format === "plain"; + const onEvent = streaming ? makeStreamingHandler() : undefined; + + // ── Run ────────────────────────────────────────────── + let result: RoundtableResult; + try { + result = await orchestrator.run({ + topic, + agents, + rounds, + synthesizer, + onEvent, + }); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + printError(`Roundtable failed: ${msg}`); + process.exit(1); + return; + } + + // ── Final output ───────────────────────────────────── + if (format === "json") { + writeLine(JSON.stringify(result, null, 2)); + return; + } + + writeLine(""); + writeLine(pc.bold(`Consensus: ${result.consensusScore.toFixed(2)}`)); + writeLine(pc.bold("Summary:")); + writeLine(result.synthesis); + if (result.keyPoints.length > 0) { + writeLine(""); + writeLine(pc.bold("Key points:")); + for (const kp of result.keyPoints) writeLine(` - ${kp}`); + } + writeLine(""); + writeLine( + pc.dim( + `(${result.transcript.length} turns in ${(result.durationMs / 1000).toFixed(1)}s, roundtable id ${result.roundtableId})`, + ), + ); +} + +function makeStreamingHandler(): (evt: RoundtableEvent) => void { + return (evt: RoundtableEvent): void => { + switch (evt.type) { + case "turn-start": { + const header = `\n[${evt.agent}] [${evt.role}] round ${evt.round} \u2500\u2500\u2500\u2500\u2500`; + writeLine(colorize(evt.role, header)); + break; + } + case "turn-chunk": { + const chunk: AgentChunk = evt.chunk; + if (chunk.type === "text") { + writeText(chunk.content); + } else if (chunk.type === "error") { + writeLine("\n" + pc.red(` [error] ${chunk.message}`)); + } + break; + } + case "turn-complete": + writeLine(""); + break; + case "synthesis-start": + writeLine("\n" + pc.bold(pc.magenta(`\u2500\u2500 Synthesis (${evt.agent}) \u2500\u2500`))); + break; + case "synthesis-complete": + writeLine(""); + writeLine( + pc.bold( + `Consensus score: ${evt.consensusScore.toFixed(2)}`, + ), + ); + writeLine(pc.bold("Summary:")); + writeLine(evt.summary); + break; + case "phase-change": + // Quiet — users don't want to see every state transition. + break; + case "error": + writeLine( + "\n" + + pc.red( + `[error] ${evt.agent ? `(${evt.agent}) ` : ""}${evt.message}`, + ), + ); + break; + } + }; +} diff --git a/packages/cli/src/version.ts b/packages/cli/src/version.ts index edbab61..fab1818 100644 --- a/packages/cli/src/version.ts +++ b/packages/cli/src/version.ts @@ -1 +1 @@ -export const VERSION = "0.2.0"; +export const VERSION = "0.4.0"; diff --git a/packages/dashboard/public/components/chat.js b/packages/dashboard/public/components/chat.js new file mode 100644 index 0000000..983c94e --- /dev/null +++ b/packages/dashboard/public/components/chat.js @@ -0,0 +1,525 @@ +// ARC Dashboard — Chat View (Phase 7) +// +// Streams chat completions from POST /api/chat/message via per-session +// WebSocket routing. Renders tool calls as collapsible cards, supports +// supervised-mode confirmation modals, and manages a session list sidebar. +// +// See docs/plans/ai-and-roundtable.md — Phase 7 + AD-5. + +import { api } from '../scripts/api.js'; +import { ws } from '../scripts/ws.js'; +import { registerView } from '../scripts/router.js'; +import { escapeHtml } from '../scripts/utils.js'; + +// --------------------------------------------------------------------------- +// Per-tab WebSocket session id +// --------------------------------------------------------------------------- +// +// One uuid per page load; persisted on `window` so the WS negotiation state +// survives view switches. The dashboard-wide `ws` connection is shared, so +// we only need to send `hello` once per tab. + +function uuid() { + if (typeof crypto !== 'undefined' && crypto.randomUUID) return crypto.randomUUID(); + // Lightweight fallback — good enough for an ephemeral session id. + return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, (c) => { + const r = (Math.random() * 16) | 0; + return (c === 'x' ? r : (r & 0x3) | 0x8).toString(16); + }); +} + +function getSessionId() { + if (!window.__arcDashboardSessionId) { + window.__arcDashboardSessionId = uuid(); + } + return window.__arcDashboardSessionId; +} + +/** + * Send the WS `hello` message so the server can route `chat-chunk` / + * `chat-confirm-needed` / `chat-done` events back to just this tab. + * Idempotent and re-runs after reconnect. + */ +function ensureHello() { + const sid = getSessionId(); + if (window.__arcDashboardHelloSent) return; + const socket = ws._ws; + if (socket && socket.readyState === 1) { + socket.send(JSON.stringify({ type: 'hello', sessionId: sid })); + window.__arcDashboardHelloSent = true; + } +} + +// Re-send hello whenever the WS (re)connects. +ws.on('connected', () => { + window.__arcDashboardHelloSent = false; + ensureHello(); +}); +ws.on('disconnected', () => { + window.__arcDashboardHelloSent = false; +}); + +// --------------------------------------------------------------------------- +// Bearer token lookup — mutation endpoints require Authorization. +// --------------------------------------------------------------------------- + +let cachedToken = null; +async function getToken() { + if (cachedToken) return cachedToken; + try { + const res = await fetch('/api/auth/token'); + if (!res.ok) return null; + const body = await res.json(); + cachedToken = body.token || null; + return cachedToken; + } catch { + return null; + } +} + +async function authFetch(path, init = {}) { + const token = await getToken(); + const headers = new Headers(init.headers || {}); + if (token) headers.set('Authorization', `Bearer ${token}`); + if (init.body && !headers.has('Content-Type')) { + headers.set('Content-Type', 'application/json'); + } + return fetch(path, { ...init, headers }); +} + +// --------------------------------------------------------------------------- +// Chat state (per render cycle) +// --------------------------------------------------------------------------- + +const state = { + profile: null, + mode: 'supervised', + chatSessionId: null, // current loaded chat session (null = new) + messages: [], // rendered messages: { role, content, toolCalls } + streaming: null, // in-flight assistant message being appended + listeners: [], // registered ws handlers, cleared on re-render +}; + +function resetState() { + // Detach any prior ws listeners so we don't duplicate on view switches. + for (const [event, fn] of state.listeners) ws.off(event, fn); + state.listeners = []; + state.streaming = null; + state.messages = []; + state.chatSessionId = null; +} + +function listen(event, handler) { + ws.on(event, handler); + state.listeners.push([event, handler]); +} + +// --------------------------------------------------------------------------- +// Rendering helpers +// --------------------------------------------------------------------------- + +function toolCallCard(tc) { + const summary = typeof tc.input === 'object' + ? JSON.stringify(tc.input).slice(0, 80) + : String(tc.input ?? '').slice(0, 80); + const hasResult = tc.result !== undefined || tc.error !== undefined; + const status = tc.error ? 'error' : hasResult ? 'ok' : 'pending'; + const statusLabel = tc.error ? 'ERROR' : hasResult ? 'DONE' : 'RUNNING'; + + const body = tc.error + ? `
${escapeHtml(tc.error)}
` + : `
${escapeHtml(JSON.stringify({ input: tc.input, result: tc.result }, null, 2))}
`; + + return ` +
+
+ +
+ +
`; +} + +function messageRow(msg) { + const roleLabel = msg.role.toUpperCase(); + const tools = (msg.toolCalls || []).map(toolCallCard).join(''); + const content = msg.content + ? `
${escapeHtml(msg.content)}
` + : ''; + return ` +
+
${escapeHtml(roleLabel)}
+ ${content} + ${tools} +
`; +} + +function sessionItem(s, activeId) { + const cls = s.id === activeId ? 'chat-session chat-session--active' : 'chat-session'; + return ` +
+
${escapeHtml(s.summary)}
+
${escapeHtml(String(s.messageCount))} msgs
+ +
`; +} + +function renderMessages() { + const list = document.getElementById('chat-messages'); + if (!list) return; + list.innerHTML = state.messages.map(messageRow).join(''); + list.scrollTop = list.scrollHeight; +} + +function renderSessionList(sessions) { + const el = document.getElementById('chat-sessions'); + if (!el) return; + const items = sessions.map((s) => sessionItem(s, state.chatSessionId)).join(''); + el.innerHTML = items || '
No saved sessions
'; + el.querySelectorAll('.chat-session').forEach((row) => { + row.addEventListener('click', async (e) => { + if (e.target.closest('[data-action="delete-session"]')) return; + const id = row.getAttribute('data-session-id'); + await loadSession(id); + }); + }); + el.querySelectorAll('[data-action="delete-session"]').forEach((btn) => { + btn.addEventListener('click', async (e) => { + e.stopPropagation(); + const id = btn.getAttribute('data-id'); + await deleteSession(id); + }); + }); +} + +// --------------------------------------------------------------------------- +// Chat operations +// --------------------------------------------------------------------------- + +async function refreshSessionList() { + if (!state.profile) return; + try { + const res = await fetch(`/api/chat/sessions?profile=${encodeURIComponent(state.profile)}`); + if (!res.ok) return; + const list = await res.json(); + renderSessionList(Array.isArray(list) ? list : []); + } catch { + /* ignore */ + } +} + +async function loadSession(id) { + if (!state.profile) return; + try { + const res = await fetch(`/api/chat/sessions/${encodeURIComponent(id)}?profile=${encodeURIComponent(state.profile)}`); + if (!res.ok) return; + const body = await res.json(); + state.chatSessionId = body.id; + state.messages = (body.messages || []).map((m) => ({ + role: m.role, + content: m.content, + toolCalls: m.toolCalls, + })); + renderMessages(); + await refreshSessionList(); + } catch { + /* ignore */ + } +} + +async function deleteSession(id) { + if (!state.profile) return; + try { + await authFetch(`/api/chat/sessions/${encodeURIComponent(id)}?profile=${encodeURIComponent(state.profile)}`, { + method: 'DELETE', + }); + if (state.chatSessionId === id) { + state.chatSessionId = null; + state.messages = []; + renderMessages(); + } + await refreshSessionList(); + } catch { + /* ignore */ + } +} + +function newSession() { + state.chatSessionId = null; + state.messages = []; + renderMessages(); + refreshSessionList(); +} + +async function sendMessage(text) { + if (!text.trim()) return; + if (!state.profile) { + alert('Pick a profile first'); + return; + } + ensureHello(); + + state.messages.push({ role: 'user', content: text }); + state.streaming = { role: 'assistant', content: '', toolCalls: [] }; + state.messages.push(state.streaming); + renderMessages(); + + try { + const res = await authFetch('/api/chat/message', { + method: 'POST', + body: JSON.stringify({ + sessionId: getSessionId(), + profile: state.profile, + message: text, + mode: state.mode, + chatSessionId: state.chatSessionId || undefined, + }), + }); + if (!res.ok) { + const err = await res.json().catch(() => ({ error: res.statusText })); + state.streaming.content = `[error] ${err.error || res.statusText}`; + state.streaming = null; + renderMessages(); + return; + } + const body = await res.json(); + if (body.chatSessionId) { + state.chatSessionId = body.chatSessionId; + await refreshSessionList(); + } + } catch (err) { + state.streaming.content = `[error] ${err.message}`; + state.streaming = null; + renderMessages(); + } +} + +// --------------------------------------------------------------------------- +// WS event handlers +// --------------------------------------------------------------------------- + +function onChunk(data) { + if (!state.streaming || !data) return; + + if (data.type === 'text') { + state.streaming.content += data.content || ''; + renderMessages(); + } else if (data.type === 'thinking') { + // Ignore in the visible transcript for now — keep chat focused. + } else if (data.type === 'tool_call') { + state.streaming.toolCalls.push({ + id: data.id, + name: data.tool, + input: data.input, + }); + renderMessages(); + } else if (data.type === 'tool_result') { + const tc = state.streaming.toolCalls.find((t) => t.id === data.id); + if (tc) { + const r = data.result || {}; + if (r.ok === false) { + tc.error = r.error || 'tool error'; + } else { + tc.result = r.output; + } + renderMessages(); + } + } +} + +function onDone() { + state.streaming = null; + refreshSessionList(); +} + +function onError(data) { + if (state.streaming) { + state.streaming.content += `\n[error] ${data?.message || 'unknown'}`; + renderMessages(); + } +} + +function onConfirmNeeded(data) { + if (!data || !data.tokenId) return; + showConfirmModal(data.tokenId, data.prompt || 'Run tool?'); +} + +// --------------------------------------------------------------------------- +// Confirmation modal +// --------------------------------------------------------------------------- + +function showConfirmModal(tokenId, prompt) { + const existing = document.getElementById('chat-confirm-modal'); + if (existing) existing.remove(); + + const overlay = document.createElement('div'); + overlay.id = 'chat-confirm-modal'; + overlay.className = 'modal-overlay chat-confirm'; + overlay.innerHTML = ` + `; + document.body.appendChild(overlay); + + const respond = async (allow) => { + overlay.remove(); + try { + await authFetch('/api/chat/confirm', { + method: 'POST', + body: JSON.stringify({ sessionId: getSessionId(), tokenId, allow }), + }); + } catch { + /* ignore — server will auto-deny after timeout */ + } + }; + + overlay.querySelector('[data-confirm="allow"]').addEventListener('click', () => respond(true)); + overlay.querySelector('[data-confirm="deny"]').addEventListener('click', () => respond(false)); +} + +// --------------------------------------------------------------------------- +// Event wiring for interactive UI +// --------------------------------------------------------------------------- + +function wireUi() { + const sendBtn = document.getElementById('chat-send'); + const input = document.getElementById('chat-input'); + const profileSelect = document.getElementById('chat-profile'); + const modeSelect = document.getElementById('chat-mode'); + const newBtn = document.getElementById('chat-new'); + + sendBtn?.addEventListener('click', () => { + const text = input.value; + input.value = ''; + sendMessage(text); + }); + + input?.addEventListener('keydown', (e) => { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault(); + const text = input.value; + input.value = ''; + sendMessage(text); + } + }); + + profileSelect?.addEventListener('change', () => { + state.profile = profileSelect.value; + refreshSessionList(); + }); + + modeSelect?.addEventListener('change', () => { + state.mode = modeSelect.value; + }); + + newBtn?.addEventListener('click', () => { + newSession(); + }); + + // Tool-call expand/collapse via event delegation. + document.getElementById('chat-messages')?.addEventListener('click', (e) => { + const toggle = e.target.closest('[data-action="toggle-tool"]'); + if (!toggle) return; + const tool = toggle.closest('.chat-tool'); + if (!tool) return; + const detail = tool.querySelector('.chat-tool__detail'); + const caret = tool.querySelector('.chat-tool__caret'); + const isHidden = detail.hasAttribute('hidden'); + if (isHidden) { + detail.removeAttribute('hidden'); + caret.textContent = '▾'; + } else { + detail.setAttribute('hidden', ''); + caret.textContent = '▸'; + } + }); +} + +// --------------------------------------------------------------------------- +// View render +// --------------------------------------------------------------------------- + +async function render() { + resetState(); + ensureHello(); + + // Listen for per-session chat events. These are filtered server-side via + // broadcastTo() so only this tab's chunks arrive. + listen('chat-chunk', onChunk); + listen('chat-done', onDone); + listen('chat-error', onError); + listen('chat-confirm-needed', onConfirmNeeded); + + // Load profile list for the dropdown. + let profiles = []; + try { + profiles = await api.profiles(); + } catch { + profiles = []; + } + if (profiles.length > 0 && !state.profile) { + const active = profiles.find((p) => p.active); + state.profile = active ? active.name : profiles[0].name; + } + + const profileOptions = profiles + .map((p) => ``) + .join(''); + + // Kick off a session-list refresh after render commits. + setTimeout(() => { + wireUi(); + refreshSessionList(); + }, 0); + + return ` +
+

Chat

+ IN-APP ASSISTANT +
+
+ +
+
+ + +
+
+
+ + +
+
+
`; +} + +registerView('chat', render); diff --git a/packages/dashboard/public/index.html b/packages/dashboard/public/index.html index 95a9eba..9842ef8 100644 --- a/packages/dashboard/public/index.html +++ b/packages/dashboard/public/index.html @@ -38,6 +38,7 @@ +