From aa8ee6386b90b2e4d26efcc427fe9da82747b13e Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Mon, 25 May 2026 05:16:13 -0600 Subject: [PATCH] fix(lint): biome format pass after Phase 2.8 + self-improving-loop merges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI lint check failed on main after the in-sandbox executor + composition example landed. Auto-fixable formatting only — biome check --write applied 9 file fixes (line-length wrapping in test stubs, arrow-callback spacing). No logic changes. 303 tests still pass. --- .../self-improving-loop.ts | 101 ++++++++---- src/mcp/bin-helpers.ts | 5 +- src/mcp/in-process-executor.ts | 82 +++++++--- src/mcp/index.ts | 22 +-- src/mcp/local-harness.ts | 29 ++-- src/mcp/worktree.ts | 34 ++-- tests/mcp/in-process-detect.test.ts | 2 +- tests/mcp/in-process-executor.test.ts | 150 +++++++++++++++--- tests/mcp/local-harness.test.ts | 7 +- 9 files changed, 313 insertions(+), 119 deletions(-) diff --git a/examples/self-improving-loop/self-improving-loop.ts b/examples/self-improving-loop/self-improving-loop.ts index 1b6440a..89c78ad 100644 --- a/examples/self-improving-loop/self-improving-loop.ts +++ b/examples/self-improving-loop/self-improving-loop.ts @@ -14,13 +14,13 @@ // See README.md for the conceptual map. import { - runJudge, - runMultishot, type JudgeConfig, type MultishotMessage, type MultishotPersona, type MultishotResult, type MultishotShape, + runJudge, + runMultishot, } from '@tangle-network/agent-eval/multishot' import type { AgentProfile } from '@tangle-network/sandbox' @@ -48,11 +48,16 @@ function installMockRouter(replies: ScriptedReply[]): () => void { return { ok: true, status: 200, - json: async () => ({ choices: [{ message }], usage: { prompt_tokens: 100, completion_tokens: 200 } }), + json: async () => ({ + choices: [{ message }], + usage: { prompt_tokens: 100, completion_tokens: 200 }, + }), text: async () => 'ok', } as Response }) as typeof fetch - return () => { global.fetch = original } + return () => { + global.fetch = original + } } // ── 2. A tiny domain — viral content scoring ───────────────────────────────── @@ -70,8 +75,10 @@ const PERSONAS: FounderPersona[] = [ ] const shape: MultishotShape = { - buildOpener: (p) => `I'm ${p.name}, ${p.domain}. Help me write content that actually gets engagement.`, - buildDriverSystemPrompt: (p) => `You are ${p.name} working in ${p.domain}. Push back on vague advice; demand concrete posts.`, + buildOpener: (p) => + `I'm ${p.name}, ${p.domain}. Help me write content that actually gets engagement.`, + buildDriverSystemPrompt: (p) => + `You are ${p.name} working in ${p.domain}. Push back on vague advice; demand concrete posts.`, } // ── 3. Baseline AgentProfile (v0) — intentionally weak ────────────────────── @@ -84,20 +91,24 @@ const baseline: AgentProfile = { // ── 4. Judge — scores how concrete + audience-fit the agent's output is ──── const dims = [ - { key: 'concreteness', description: 'Real posts vs vague descriptions (0=descriptions, 10=ready-to-post)' }, - { key: 'audience_fit', description: 'Tailored to the persona\'s domain (0=generic, 10=spot-on)' }, + { + key: 'concreteness', + description: 'Real posts vs vague descriptions (0=descriptions, 10=ready-to-post)', + }, + { key: 'audience_fit', description: "Tailored to the persona's domain (0=generic, 10=spot-on)" }, ] as const -const conversationJudge: JudgeConfig<{ transcript: MultishotMessage[]; persona: FounderPersona }> = { - name: 'content-quality', - systemPrompt: 'You are a strict judge. Output ONLY valid JSON.', - dimensions: [...dims], - buildPrompt: ({ transcript, persona }) => - `Score this agent's output for ${persona.name} (${persona.domain}). 0-10 each.\n\n${transcript - .filter((m) => m.role !== 'tool') - .map((m) => `${m.role}: ${m.content}`) - .join('\n\n')}\n\nRespond with ONLY: {"concreteness":N,"audience_fit":N,"notes":"..."}`, -} +const conversationJudge: JudgeConfig<{ transcript: MultishotMessage[]; persona: FounderPersona }> = + { + name: 'content-quality', + systemPrompt: 'You are a strict judge. Output ONLY valid JSON.', + dimensions: [...dims], + buildPrompt: ({ transcript, persona }) => + `Score this agent's output for ${persona.name} (${persona.domain}). 0-10 each.\n\n${transcript + .filter((m) => m.role !== 'tool') + .map((m) => `${m.role}: ${m.content}`) + .join('\n\n')}\n\nRespond with ONLY: {"concreteness":N,"audience_fit":N,"notes":"..."}`, + } // ── 5. Analyst — reads v0 transcripts + scores, proposes a mutation ──────── @@ -106,13 +117,16 @@ interface AnalystFinding { proposedMutation: string } -async function runAnalyst(v0Runs: Array<{ persona: FounderPersona; result: MultishotResult; score: { composite: number } }>): Promise { +async function runAnalyst( + v0Runs: Array<{ persona: FounderPersona; result: MultishotResult; score: { composite: number } }>, +): Promise { // In a real product the analyst would be an LLM call (@tangle-network/agent-runtime/analyst-loop). // Here we synthesise the finding deterministically so the demo is reproducible. const worst = [...v0Runs].sort((a, b) => a.score.composite - b.score.composite)[0] return { rootCause: `${worst.persona.name} run scored ${worst.score.composite.toFixed(1)} — output was too generic, no concrete posts.`, - proposedMutation: 'Always include 2 ready-to-post examples tailored to the persona\'s exact domain (use specific verbs, numbers, and audience language).', + proposedMutation: + "Always include 2 ready-to-post examples tailored to the persona's exact domain (use specific verbs, numbers, and audience language).", } } @@ -128,10 +142,19 @@ function applyMutation(base: AgentProfile, mutation: string): AgentProfile { // ── 6. Gate — promote v1 only if it beats v0 by >= delta ─────────────────── -function gate(v0Mean: number, v1Mean: number, requiredDelta = 0.5): { ship: boolean; delta: number; reason: string } { +function gate( + v0Mean: number, + v1Mean: number, + requiredDelta = 0.5, +): { ship: boolean; delta: number; reason: string } { const delta = v1Mean - v0Mean - if (delta >= requiredDelta) return { ship: true, delta, reason: `v1 beat v0 by ${delta.toFixed(2)} (>= ${requiredDelta})` } - return { ship: false, delta, reason: `v1 only beat v0 by ${delta.toFixed(2)} (< ${requiredDelta})` } + if (delta >= requiredDelta) + return { ship: true, delta, reason: `v1 beat v0 by ${delta.toFixed(2)} (>= ${requiredDelta})` } + return { + ship: false, + delta, + reason: `v1 only beat v0 by ${delta.toFixed(2)} (< ${requiredDelta})`, + } } // ── 7. Wire it together ───────────────────────────────────────────────────── @@ -140,7 +163,11 @@ async function runVariant(profile: AgentProfile, scriptedReplies: ScriptedReply[ const restore = installMockRouter(scriptedReplies) process.env.TANGLE_API_KEY ??= 'test-key' try { - const runs: Array<{ persona: FounderPersona; result: MultishotResult; score: { composite: number } }> = [] + const runs: Array<{ + persona: FounderPersona + result: MultishotResult + score: { composite: number } + }> = [] for (const persona of PERSONAS) { const result = await runMultishot({ profile, persona, shape, maxTurns: 1 }) const score = await runJudge(conversationJudge, { transcript: result.transcript, persona }) @@ -168,7 +195,8 @@ async function main(): Promise { console.log('— Phase 1: v0 baseline run') const v0 = await runVariant(baseline, v0Replies) console.log(` v0 mean: ${v0.mean.toFixed(2)} (over ${v0.runs.length} personas)`) - for (const r of v0.runs) console.log(` ${r.persona.id.padEnd(14)} composite=${r.score.composite.toFixed(2)}`) + for (const r of v0.runs) + console.log(` ${r.persona.id.padEnd(14)} composite=${r.score.composite.toFixed(2)}`) console.log('\n— Phase 2: analyst proposes mutation') const finding = await runAnalyst(v0.runs) @@ -180,26 +208,37 @@ async function main(): Promise { // v1 replies: now concrete + audience-fit const v1Replies: ScriptedReply[] = [ - { text: 'Here are 2 tweets for Maya: "Just opened our 50th retailer in TX — onboarding playbook is up on Notion." / "Why we said no to Kroger: margin math + ops bandwidth."' }, + { + text: 'Here are 2 tweets for Maya: "Just opened our 50th retailer in TX — onboarding playbook is up on Notion." / "Why we said no to Kroger: margin math + ops bandwidth."', + }, { text: '{"concreteness":8,"audience_fit":9,"notes":"concrete + retail-specific"}' }, - { text: 'Here are 2 LinkedIn posts for Theo: "We cut MRR churn 32% by routing every renewal through a forecasted-risk score." / "Why your B2B PLG playbook stalls at $5M ARR (and what to do)."' }, + { + text: 'Here are 2 LinkedIn posts for Theo: "We cut MRR churn 32% by routing every renewal through a forecasted-risk score." / "Why your B2B PLG playbook stalls at $5M ARR (and what to do)."', + }, { text: '{"concreteness":9,"audience_fit":8,"notes":"B2B-specific metrics"}' }, - { text: 'Two TikTok hooks for Aurora: "POV: you finally found the foundation that matches NC15 + has SPF" / "What I wish I knew before booking my first brand deal at 50k followers."' }, + { + text: 'Two TikTok hooks for Aurora: "POV: you finally found the foundation that matches NC15 + has SPF" / "What I wish I knew before booking my first brand deal at 50k followers."', + }, { text: '{"concreteness":8,"audience_fit":9,"notes":"creator-economy-specific"}' }, ] console.log('\n— Phase 4: v1 re-run') const v1Result = await runVariant(v1, v1Replies) console.log(` v1 mean: ${v1Result.mean.toFixed(2)} (over ${v1Result.runs.length} personas)`) - for (const r of v1Result.runs) console.log(` ${r.persona.id.padEnd(14)} composite=${r.score.composite.toFixed(2)}`) + for (const r of v1Result.runs) + console.log(` ${r.persona.id.padEnd(14)} composite=${r.score.composite.toFixed(2)}`) console.log('\n— Phase 5: gate decision') const verdict = gate(v0.mean, v1Result.mean) - console.log(` ship: ${verdict.ship} | delta: ${verdict.delta >= 0 ? '+' : ''}${verdict.delta.toFixed(2)} | ${verdict.reason}`) + console.log( + ` ship: ${verdict.ship} | delta: ${verdict.delta >= 0 ? '+' : ''}${verdict.delta.toFixed(2)} | ${verdict.reason}`, + ) if (verdict.ship) { console.log('\n═══ PROMOTED v1 → production ═══') - console.log('In a real product the new systemPrompt would land in the production composer\nand subsequent chat turns would use it. See agent-eval-adoption skill Phase 3.') + console.log( + 'In a real product the new systemPrompt would land in the production composer\nand subsequent chat turns would use it. See agent-eval-adoption skill Phase 3.', + ) } else { console.log('\n═══ HELD — keep v0 ═══') } diff --git a/src/mcp/bin-helpers.ts b/src/mcp/bin-helpers.ts index 1d2e6b4..b37f8ab 100644 --- a/src/mcp/bin-helpers.ts +++ b/src/mcp/bin-helpers.ts @@ -82,7 +82,10 @@ const KNOWN_HARNESSES: ReadonlyArray = ['claude', 'codex', 'openco function parseHarnesses(raw: string | undefined): ReadonlyArray | undefined { if (!raw) return undefined - const parts = raw.split(',').map((s) => s.trim()).filter(Boolean) + const parts = raw + .split(',') + .map((s) => s.trim()) + .filter(Boolean) if (parts.length === 0) return undefined for (const part of parts) { if (!KNOWN_HARNESSES.includes(part as LocalHarness)) { diff --git a/src/mcp/in-process-executor.ts b/src/mcp/in-process-executor.ts index 8a905d0..1c2494c 100644 --- a/src/mcp/in-process-executor.ts +++ b/src/mcp/in-process-executor.ts @@ -37,12 +37,12 @@ import { randomUUID } from 'node:crypto' import type { CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox' import type { LoopSandboxClient, LoopSandboxPlacement } from '../loops' import type { DelegationExecutor } from './executor' -import { runLocalHarness, type LocalHarness } from './local-harness' +import { type LocalHarness, runLocalHarness } from './local-harness' import { captureWorktreeDiff, createWorktree, - removeWorktree, type GitRunner, + removeWorktree, type WorktreeHandle, } from './worktree' @@ -85,7 +85,11 @@ export interface InProcessExecutorOptions { * Test seam — override the post-check runner. Defaults to spawning the * configured `testCmd` / `typecheckCmd` via `child_process.spawn`. */ - runPostCheck?: (cmd: string, cwd: string, signal?: AbortSignal) => Promise<{ exitCode: number; stdout: string; stderr: string }> + runPostCheck?: ( + cmd: string, + cwd: string, + signal?: AbortSignal, + ) => Promise<{ exitCode: number; stdout: string; stderr: string }> } /** @experimental */ @@ -125,7 +129,10 @@ const DEFAULT_POSTCHECK_TIMEOUT_MS = 2 * 60 * 1000 * @experimental */ export function createInProcessExecutor(options: InProcessExecutorOptions): DelegationExecutor { - const harnesses = options.harnesses && options.harnesses.length > 0 ? [...options.harnesses] : (['claude'] as const) + const harnesses = + options.harnesses && options.harnesses.length > 0 + ? [...options.harnesses] + : (['claude'] as const) const runHarness = options.runHarness ?? runLocalHarness const runPostCheck = options.runPostCheck ?? defaultRunPostCheck @@ -145,10 +152,21 @@ export function createInProcessExecutor(options: InProcessExecutorOptions): Dele id: `in-process-${runId}`, __inProcess: { runId, harness }, // eslint-disable-next-line require-yield - async *streamPrompt(this: VirtualSandbox, message: string | unknown[], promptOpts?: { signal?: AbortSignal }): AsyncGenerator { - const taskPrompt = typeof message === 'string' - ? message - : message.map((p) => (typeof p === 'object' && p && 'text' in p ? String((p as { text: unknown }).text) : '')).join('\n') + async *streamPrompt( + this: VirtualSandbox, + message: string | unknown[], + promptOpts?: { signal?: AbortSignal }, + ): AsyncGenerator { + const taskPrompt = + typeof message === 'string' + ? message + : message + .map((p) => + typeof p === 'object' && p && 'text' in p + ? String((p as { text: unknown }).text) + : '', + ) + .join('\n') let worktree: WorktreeHandle | undefined try { @@ -198,18 +216,22 @@ export function createInProcessExecutor(options: InProcessExecutorOptions): Dele // Optional post-checks. Each runs in the WORKTREE so it sees the // harness's edits. const testCheck = options.testCmd - ? await runPostCheck(options.testCmd, worktree.path, promptOpts?.signal).catch((err) => ({ - exitCode: -1, - stdout: '', - stderr: err instanceof Error ? err.message : String(err), - })) + ? await runPostCheck(options.testCmd, worktree.path, promptOpts?.signal).catch( + (err) => ({ + exitCode: -1, + stdout: '', + stderr: err instanceof Error ? err.message : String(err), + }), + ) : { exitCode: 0, stdout: '', stderr: '' } const typecheckCheck = options.typecheckCmd - ? await runPostCheck(options.typecheckCmd, worktree.path, promptOpts?.signal).catch((err) => ({ - exitCode: -1, - stdout: '', - stderr: err instanceof Error ? err.message : String(err), - })) + ? await runPostCheck(options.typecheckCmd, worktree.path, promptOpts?.signal).catch( + (err) => ({ + exitCode: -1, + stdout: '', + stderr: err instanceof Error ? err.message : String(err), + }), + ) : { exitCode: 0, stdout: '', stderr: '' } const coderOutput = { @@ -224,9 +246,10 @@ export function createInProcessExecutor(options: InProcessExecutorOptions): Dele output: tail(typecheckCheck.stderr || typecheckCheck.stdout, 4000), }, diffStats: diff.stats, - reviewerNotes: harnessResult.exitCode === 0 - ? undefined - : `harness ${harness} exited ${harnessResult.exitCode}${harnessResult.timedOut ? ' (timed out)' : ''}`, + reviewerNotes: + harnessResult.exitCode === 0 + ? undefined + : `harness ${harness} exited ${harnessResult.exitCode}${harnessResult.timedOut ? ' (timed out)' : ''}`, } // The terminal event the coderProfile parser looks for. @@ -286,10 +309,16 @@ async function defaultRunPostCheck( const child = spawn('sh', ['-c', cmd], { cwd, stdio: 'pipe' }) let stdout = '' let stderr = '' - child.stdout?.on('data', (c) => { stdout += String(c) }) - child.stderr?.on('data', (c) => { stderr += String(c) }) + child.stdout?.on('data', (c) => { + stdout += String(c) + }) + child.stderr?.on('data', (c) => { + stderr += String(c) + }) if (signal) { - const onAbort = () => { if (!child.killed) child.kill('SIGTERM') } + const onAbort = () => { + if (!child.killed) child.kill('SIGTERM') + } if (signal.aborted) onAbort() else signal.addEventListener('abort', onAbort, { once: true }) } @@ -299,7 +328,10 @@ async function defaultRunPostCheck( if (typeof (killTimer as { unref?: () => void }).unref === 'function') { ;(killTimer as { unref: () => void }).unref() } - child.on('error', (err) => { clearTimeout(killTimer); reject(err) }) + child.on('error', (err) => { + clearTimeout(killTimer) + reject(err) + }) child.on('close', (code) => { clearTimeout(killTimer) resolve({ exitCode: code ?? -1, stdout, stderr }) diff --git a/src/mcp/index.ts b/src/mcp/index.ts index 520eba0..6ab9b2b 100644 --- a/src/mcp/index.ts +++ b/src/mcp/index.ts @@ -29,6 +29,8 @@ export type { SiblingSandboxExecutorOptions, } from './executor' export { createFleetWorkspaceExecutor, createSiblingSandboxExecutor } from './executor' +export type { FeedbackEvent, FeedbackStore } from './feedback-store' +export { eventToSnapshot, InMemoryFeedbackStore } from './feedback-store' export type { InProcessExecutorDescribePlacement, InProcessExecutorOptions, @@ -36,17 +38,6 @@ export type { export { createInProcessExecutor } from './in-process-executor' export type { LocalHarness, LocalHarnessResult, RunLocalHarnessOptions } from './local-harness' export { runLocalHarness } from './local-harness' -export type { - CreateWorktreeOptions, - DiffOptions, - DiffResult, - GitRunner, - RemoveWorktreeOptions, - WorktreeHandle, -} from './worktree' -export { captureWorktreeDiff, createWorktree, removeWorktree } from './worktree' -export type { FeedbackEvent, FeedbackStore } from './feedback-store' -export { eventToSnapshot, InMemoryFeedbackStore } from './feedback-store' export { mcpToolsForRuntimeMcp, mcpToolsForRuntimeMcpSubset } from './openai-tools' export type { JsonRpcMessage, @@ -130,3 +121,12 @@ export type { ResearchOutputShape, ResearchSource, } from './types' +export type { + CreateWorktreeOptions, + DiffOptions, + DiffResult, + GitRunner, + RemoveWorktreeOptions, + WorktreeHandle, +} from './worktree' +export { captureWorktreeDiff, createWorktree, removeWorktree } from './worktree' diff --git a/src/mcp/local-harness.ts b/src/mcp/local-harness.ts index cfc415e..2353518 100644 --- a/src/mcp/local-harness.ts +++ b/src/mcp/local-harness.ts @@ -16,7 +16,7 @@ * - enforce a wall-clock timeout */ -import { spawn, type ChildProcess } from 'node:child_process' +import { type ChildProcess, spawn } from 'node:child_process' /** Local coding harness available inside the sandbox. */ export type LocalHarness = 'claude' | 'codex' | 'opencode' @@ -57,11 +57,15 @@ export interface RunLocalHarnessOptions { * Test seam — inject a custom spawner so unit tests can mock the * subprocess without touching the OS. Defaults to node's `child_process.spawn`. */ - spawn?: (command: string, args: ReadonlyArray, opts: { - cwd: string - env: NodeJS.ProcessEnv - stdio: 'pipe' - }) => ChildProcess + spawn?: ( + command: string, + args: ReadonlyArray, + opts: { + cwd: string + env: NodeJS.ProcessEnv + stdio: 'pipe' + }, + ) => ChildProcess } /** @experimental */ @@ -129,12 +133,13 @@ export function runLocalHarness(options: RunLocalHarnessOptions): Promise 0 - ? setTimeout(() => { - timedOut = true - if (!child.killed) child.kill('SIGTERM') - }, timeoutMs) - : null + const timer = + timeoutMs > 0 + ? setTimeout(() => { + timedOut = true + if (!child.killed) child.kill('SIGTERM') + }, timeoutMs) + : null if (timer && typeof (timer as { unref?: () => void }).unref === 'function') { ;(timer as { unref: () => void }).unref() } diff --git a/src/mcp/worktree.ts b/src/mcp/worktree.ts index 09fb554..af85acf 100644 --- a/src/mcp/worktree.ts +++ b/src/mcp/worktree.ts @@ -86,8 +86,12 @@ async function runGitAsync( const proc = spawn('git', args, { cwd, stdio: 'pipe' }) let stdout = '' let stderr = '' - proc.stdout?.on('data', (c) => { stdout += String(c) }) - proc.stderr?.on('data', (c) => { stderr += String(c) }) + proc.stdout?.on('data', (c) => { + stdout += String(c) + }) + proc.stderr?.on('data', (c) => { + stderr += String(c) + }) proc.on('error', reject) proc.on('close', (code) => resolve({ stdout, stderr, exitCode: code ?? -1 })) }) @@ -98,7 +102,9 @@ function ensureGitOk( result: { stdout: string; stderr: string; exitCode: number }, ): void { if (result.exitCode !== 0) { - throw new Error(`worktree: git ${step} failed (exit ${result.exitCode}): ${result.stderr.slice(0, 400)}`) + throw new Error( + `worktree: git ${step} failed (exit ${result.exitCode}): ${result.stderr.slice(0, 400)}`, + ) } } @@ -129,7 +135,11 @@ export async function captureWorktreeDiff(options: DiffOptions): Promise undefined, - ) + await runGitAsync( + ['branch', '-D', options.worktree.branch], + options.repoRoot, + options.runGit, + ).catch(() => undefined) } // Always attempt branch removal — the worktree-remove sometimes leaves // the branch behind even when the directory is gone. - await runGitAsync(['branch', '-D', options.worktree.branch], options.repoRoot, options.runGit).catch( - () => undefined, - ) + await runGitAsync( + ['branch', '-D', options.worktree.branch], + options.repoRoot, + options.runGit, + ).catch(() => undefined) } diff --git a/tests/mcp/in-process-detect.test.ts b/tests/mcp/in-process-detect.test.ts index 8539b3e..98f9de3 100644 --- a/tests/mcp/in-process-detect.test.ts +++ b/tests/mcp/in-process-detect.test.ts @@ -1,7 +1,7 @@ import { describe, expect, it, vi } from 'vitest' import { detectExecutor } from '../../src/mcp/bin-helpers' -const stubClient = { create: vi.fn(async () => ({ id: 'sibling-stub' } as never)) } +const stubClient = { create: vi.fn(async () => ({ id: 'sibling-stub' }) as never) } describe('detectExecutor — in-process selection', () => { it('selects in-process when AGENT_RUNTIME_IN_SANDBOX=1', async () => { diff --git a/tests/mcp/in-process-executor.test.ts b/tests/mcp/in-process-executor.test.ts index d91499c..1b4ca39 100644 --- a/tests/mcp/in-process-executor.test.ts +++ b/tests/mcp/in-process-executor.test.ts @@ -37,7 +37,8 @@ describe('createInProcessExecutor', () => { const state: FakeGitState = { worktreesCreated: [], worktreesRemoved: [], - diffPatch: 'diff --git a/util.ts b/util.ts\n+++ b/util.ts\n@@ +1 @@\n+export const add = (a,b)=>a+b\n', + diffPatch: + 'diff --git a/util.ts b/util.ts\n+++ b/util.ts\n@@ +1 @@\n+export const add = (a,b)=>a+b\n', diffShortstat: ' 1 file changed, 1 insertion(+), 0 deletions(-)\n', baseSha: 'abc1234', } @@ -57,12 +58,26 @@ describe('createInProcessExecutor', () => { const box = await exec.client.create() const events: Array<{ type: string; data: Record }> = [] - for await (const event of (box as unknown as { streamPrompt: (m: string) => AsyncGenerator<{ type: string; data: Record }> }).streamPrompt('add util.ts exporting add(a,b)')) { + for await (const event of ( + box as unknown as { + streamPrompt: (m: string) => AsyncGenerator<{ type: string; data: Record }> + } + ).streamPrompt('add util.ts exporting add(a,b)')) { events.push(event) } - expect(events.map((e) => e.type)).toEqual(['in_process.harness.started', 'in_process.harness.ended', 'result']) - const result = events[2]!.data.result as { branch: string; patch: string; testResult: { passed: boolean }; typecheckResult: { passed: boolean }; diffStats: { filesChanged: number; insertions: number; deletions: number } } + expect(events.map((e) => e.type)).toEqual([ + 'in_process.harness.started', + 'in_process.harness.ended', + 'result', + ]) + const result = events[2]!.data.result as { + branch: string + patch: string + testResult: { passed: boolean } + typecheckResult: { passed: boolean } + diffStats: { filesChanged: number; insertions: number; deletions: number } + } expect(result.patch).toContain('util.ts') expect(result.diffStats).toEqual({ filesChanged: 1, insertions: 1, deletions: 0 }) expect(result.testResult.passed).toBe(true) @@ -72,9 +87,20 @@ describe('createInProcessExecutor', () => { }) it('rotates harnesses round-robin across create() calls', async () => { - const state: FakeGitState = { worktreesCreated: [], worktreesRemoved: [], diffPatch: '', diffShortstat: '', baseSha: 'sha' } + const state: FakeGitState = { + worktreesCreated: [], + worktreesRemoved: [], + diffPatch: '', + diffShortstat: '', + baseSha: 'sha', + } const runHarness = vi.fn(async () => ({ - exitCode: 0, stdout: '', stderr: '', killedBySignal: null, durationMs: 1, timedOut: false, + exitCode: 0, + stdout: '', + stderr: '', + killedBySignal: null, + durationMs: 1, + timedOut: false, })) const exec = createInProcessExecutor({ repoRoot: '/w', @@ -85,7 +111,9 @@ describe('createInProcessExecutor', () => { for (let i = 0; i < 6; i++) { const box = await exec.client.create() - for await (const _ of (box as unknown as { streamPrompt: (m: string) => AsyncGenerator }).streamPrompt('task ' + i)) { + for await (const _ of ( + box as unknown as { streamPrompt: (m: string) => AsyncGenerator } + ).streamPrompt('task ' + i)) { // drain } } @@ -94,7 +122,13 @@ describe('createInProcessExecutor', () => { }) it('runs testCmd + typecheckCmd against the worktree and folds results into CoderOutput', async () => { - const state: FakeGitState = { worktreesCreated: [], worktreesRemoved: [], diffPatch: '', diffShortstat: '', baseSha: 'sha' } + const state: FakeGitState = { + worktreesCreated: [], + worktreesRemoved: [], + diffPatch: '', + diffShortstat: '', + baseSha: 'sha', + } const runPostCheck = vi.fn(async (cmd: string) => ({ exitCode: cmd === 'pnpm test' ? 0 : 1, stdout: '', @@ -106,16 +140,30 @@ describe('createInProcessExecutor', () => { testCmd: 'pnpm test', typecheckCmd: 'pnpm typecheck', runGit: makeFakeGit(state), - runHarness: vi.fn(async () => ({ exitCode: 0, stdout: '', stderr: '', killedBySignal: null, durationMs: 1, timedOut: false })), + runHarness: vi.fn(async () => ({ + exitCode: 0, + stdout: '', + stderr: '', + killedBySignal: null, + durationMs: 1, + timedOut: false, + })), runPostCheck, }) const box = await exec.client.create() const events: Array<{ type: string; data: Record }> = [] - for await (const event of (box as unknown as { streamPrompt: (m: string) => AsyncGenerator<{ type: string; data: Record }> }).streamPrompt('go')) { + for await (const event of ( + box as unknown as { + streamPrompt: (m: string) => AsyncGenerator<{ type: string; data: Record }> + } + ).streamPrompt('go')) { events.push(event) } - const result = events.find((e) => e.type === 'result')!.data.result as { testResult: { passed: boolean }; typecheckResult: { passed: boolean; output: string } } + const result = events.find((e) => e.type === 'result')!.data.result as { + testResult: { passed: boolean } + typecheckResult: { passed: boolean; output: string } + } expect(result.testResult.passed).toBe(true) expect(result.typecheckResult.passed).toBe(false) expect(result.typecheckResult.output).toContain('type error') @@ -123,51 +171,101 @@ describe('createInProcessExecutor', () => { }) it('marks result with reviewerNotes when harness exits non-zero', async () => { - const state: FakeGitState = { worktreesCreated: [], worktreesRemoved: [], diffPatch: '', diffShortstat: '', baseSha: 'sha' } + const state: FakeGitState = { + worktreesCreated: [], + worktreesRemoved: [], + diffPatch: '', + diffShortstat: '', + baseSha: 'sha', + } const exec = createInProcessExecutor({ repoRoot: '/w', runGit: makeFakeGit(state), - runHarness: vi.fn(async () => ({ exitCode: 2, stdout: '', stderr: 'fail', killedBySignal: null, durationMs: 1, timedOut: false })), + runHarness: vi.fn(async () => ({ + exitCode: 2, + stdout: '', + stderr: 'fail', + killedBySignal: null, + durationMs: 1, + timedOut: false, + })), }) const box = await exec.client.create() const events: Array<{ type: string; data: Record }> = [] - for await (const event of (box as unknown as { streamPrompt: (m: string) => AsyncGenerator<{ type: string; data: Record }> }).streamPrompt('x')) { + for await (const event of ( + box as unknown as { + streamPrompt: (m: string) => AsyncGenerator<{ type: string; data: Record }> + } + ).streamPrompt('x')) { events.push(event) } - const result = events.find((e) => e.type === 'result')!.data.result as { reviewerNotes?: string } + const result = events.find((e) => e.type === 'result')!.data.result as { + reviewerNotes?: string + } expect(result.reviewerNotes).toContain('claude exited 2') }) it('cleans up worktree even when streamPrompt is aborted mid-flight', async () => { - const state: FakeGitState = { worktreesCreated: [], worktreesRemoved: [], diffPatch: '', diffShortstat: '', baseSha: 'sha' } + const state: FakeGitState = { + worktreesCreated: [], + worktreesRemoved: [], + diffPatch: '', + diffShortstat: '', + baseSha: 'sha', + } const exec = createInProcessExecutor({ repoRoot: '/w', runGit: makeFakeGit(state), - runHarness: vi.fn(async () => { throw new Error('boom') }), + runHarness: vi.fn(async () => { + throw new Error('boom') + }), }) const box = await exec.client.create() - await expect((async () => { - for await (const _ of (box as unknown as { streamPrompt: (m: string) => AsyncGenerator }).streamPrompt('x')) { - // drain - } - })()).rejects.toThrow(/boom/) + await expect( + (async () => { + for await (const _ of ( + box as unknown as { streamPrompt: (m: string) => AsyncGenerator } + ).streamPrompt('x')) { + // drain + } + })(), + ).rejects.toThrow(/boom/) expect(state.worktreesCreated.length).toBe(1) expect(state.worktreesRemoved.length).toBe(1) }) it('describePlacement carries harness + worktreePath after streamPrompt runs', async () => { - const state: FakeGitState = { worktreesCreated: [], worktreesRemoved: [], diffPatch: '', diffShortstat: '', baseSha: 'sha' } + const state: FakeGitState = { + worktreesCreated: [], + worktreesRemoved: [], + diffPatch: '', + diffShortstat: '', + baseSha: 'sha', + } const exec = createInProcessExecutor({ repoRoot: '/w', harnesses: ['codex'], runGit: makeFakeGit(state), - runHarness: vi.fn(async () => ({ exitCode: 0, stdout: '', stderr: '', killedBySignal: null, durationMs: 1, timedOut: false })), + runHarness: vi.fn(async () => ({ + exitCode: 0, + stdout: '', + stderr: '', + killedBySignal: null, + durationMs: 1, + timedOut: false, + })), }) const box = await exec.client.create() - for await (const _ of (box as unknown as { streamPrompt: (m: string) => AsyncGenerator }).streamPrompt('x')) { + for await (const _ of ( + box as unknown as { streamPrompt: (m: string) => AsyncGenerator } + ).streamPrompt('x')) { // drain so streamPrompt populates the worktree handle } - const placement = exec.client.describePlacement?.(box) as { harness?: string; worktreePath?: string; sandboxId?: string } + const placement = exec.client.describePlacement?.(box) as { + harness?: string + worktreePath?: string + sandboxId?: string + } expect(placement?.harness).toBe('codex') expect(placement?.worktreePath).toMatch(/\.coder-variants/) expect(placement?.sandboxId).toMatch(/^in-process-/) diff --git a/tests/mcp/local-harness.test.ts b/tests/mcp/local-harness.test.ts index 060aadf..30d77e9 100644 --- a/tests/mcp/local-harness.test.ts +++ b/tests/mcp/local-harness.test.ts @@ -69,7 +69,8 @@ describe('runLocalHarness', () => { harness: 'opencode', cwd: '/tmp/wt', taskPrompt: 'x', - spawn: () => makeFakeChild({ emitErrorBeforeClose: new Error('ENOENT: opencode not on PATH') }), + spawn: () => + makeFakeChild({ emitErrorBeforeClose: new Error('ENOENT: opencode not on PATH') }), }), ).rejects.toThrow(/ENOENT/) }) @@ -117,7 +118,9 @@ describe('runLocalHarness', () => { }) it('builds CLI-correct args for each known harness', async () => { - const spawnSpy = vi.fn((_cmd: string, _args: ReadonlyArray) => makeFakeChild({ exitCode: 0 })) + const spawnSpy = vi.fn((_cmd: string, _args: ReadonlyArray) => + makeFakeChild({ exitCode: 0 }), + ) for (const harness of ['claude', 'codex', 'opencode'] as const) { await runLocalHarness({ harness, cwd: '/tmp/wt', taskPrompt: 'go', spawn: spawnSpy }) }