diff --git a/src/cli.ts b/src/cli.ts index 3fcf3fe..da7c313 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -7,7 +7,7 @@ import { compare } from "./commands/compare.js"; import { type ConfigAction, config } from "./commands/config.js"; import { evaluate } from "./commands/evaluate.js"; import { list } from "./commands/list.js"; -import { run } from "./commands/run.js"; +import { retry, run } from "./commands/run.js"; import { stats } from "./commands/stats.js"; import { undo } from "./commands/undo.js"; import { loadConfig } from "./utils/config.js"; @@ -48,15 +48,8 @@ program .option("--no-color", "Disable colored output") .option("--output-format ", "Output format: text (default) or json", "text") .option("--verbose", "Show detailed output from each agent") + .option("--retry", "Re-run only failed/timed-out agents from the last run") .action(async (promptArg: string | undefined, opts) => { - const prompt = resolvePrompt(promptArg, opts.file); - - const attempts = parseInt(opts.attempts, 10); - if (Number.isNaN(attempts) || attempts < 1 || attempts > 20) { - console.error("Error: --attempts must be a number between 1 and 20"); - process.exit(1); - } - const testTimeout = parseInt(opts.testTimeout, 10); if (Number.isNaN(testTimeout) || testTimeout < 10 || testTimeout > 600) { console.error("Error: --test-timeout must be a number between 10 and 600 seconds"); @@ -92,6 +85,33 @@ program process.exit(1); } + // --retry: re-run only failed agents from last run, ignore --attempts and prompt + if (opts.retry) { + await retry({ + prompt: "", // ignored — loaded from previous result + attempts: 0, // ignored — determined by failed agent count + testCmd: opts.testCmd, + testTimeout, + timeout, + model: opts.model, + threshold, + runner: opts.runner, + scoring: opts.scoring, + verbose: opts.verbose ?? false, + outputFormat: opts.outputFormat, + retry: true, + }); + return; + } + + const prompt = resolvePrompt(promptArg, opts.file); + + const attempts = parseInt(opts.attempts, 10); + if (Number.isNaN(attempts) || attempts < 1 || attempts > 20) { + console.error("Error: --attempts must be a number between 1 and 20"); + process.exit(1); + } + const knownModels = ["sonnet", "opus", "haiku"]; if (!knownModels.includes(opts.model) && !opts.model.startsWith("claude-")) { console.warn( diff --git a/src/commands/run.test.ts b/src/commands/run.test.ts index 64318ce..3bacd7d 100644 --- a/src/commands/run.test.ts +++ b/src/commands/run.test.ts @@ -1,7 +1,12 @@ import assert from "node:assert/strict"; import { afterEach, describe, it } from "node:test"; -import type { RunOptions } from "../types.js"; -import { makeResultFilename, preflightValidation } from "./run.js"; +import type { AgentResult, EnsembleResult, RunOptions } from "../types.js"; +import { + findFailedAgents, + makeResultFilename, + mergeRetryResults, + preflightValidation, +} from "./run.js"; function makeOpts(overrides: Partial = {}): RunOptions { return { @@ -117,3 +122,135 @@ describe("NO_COLOR environment variable", () => { assert.equal(colors.bold("test"), "test"); }); }); + +function makeAgent(overrides: Partial = {}): AgentResult { + return { + id: 1, + worktree: "/tmp/thinktank-agent-1", + status: "success", + exitCode: 0, + duration: 5000, + output: "", + diff: "diff --git a/file.ts b/file.ts\n+added line", + filesChanged: ["file.ts"], + linesAdded: 1, + linesRemoved: 0, + ...overrides, + }; +} + +function makeResult(overrides: Partial = {}): EnsembleResult { + return { + prompt: "fix the bug", + model: "sonnet", + timestamp: "2026-03-28T10:00:00.000Z", + scoring: "copeland", + agents: [ + makeAgent({ id: 1, status: "success" }), + makeAgent({ id: 2, status: "error", exitCode: 1, diff: "", filesChanged: [] }), + makeAgent({ id: 3, status: "timeout", exitCode: 1, diff: "", filesChanged: [] }), + ], + tests: [], + convergence: [], + recommended: 1, + scores: [], + ...overrides, + }; +} + +describe("findFailedAgents", () => { + it("returns agents with error status", () => { + const result = makeResult(); + const failed = findFailedAgents(result); + const ids = failed.map((a) => a.id); + assert.ok(ids.includes(2)); + }); + + it("returns agents with timeout status", () => { + const result = makeResult(); + const failed = findFailedAgents(result); + const ids = failed.map((a) => a.id); + assert.ok(ids.includes(3)); + }); + + it("does not return successful agents", () => { + const result = makeResult(); + const failed = findFailedAgents(result); + const ids = failed.map((a) => a.id); + assert.ok(!ids.includes(1)); + }); + + it("returns empty array when all agents succeeded", () => { + const result = makeResult({ + agents: [makeAgent({ id: 1, status: "success" }), makeAgent({ id: 2, status: "success" })], + }); + const failed = findFailedAgents(result); + assert.equal(failed.length, 0); + }); + + it("returns all agents when all failed", () => { + const result = makeResult({ + agents: [ + makeAgent({ id: 1, status: "error" }), + makeAgent({ id: 2, status: "timeout" }), + makeAgent({ id: 3, status: "error" }), + ], + }); + const failed = findFailedAgents(result); + assert.equal(failed.length, 3); + }); +}); + +describe("mergeRetryResults", () => { + it("replaces failed agents with retried results", () => { + const original = makeResult(); + const retried = [ + makeAgent({ id: 2, status: "success", diff: "new diff for 2", filesChanged: ["a.ts"] }), + makeAgent({ id: 3, status: "success", diff: "new diff for 3", filesChanged: ["b.ts"] }), + ]; + + const merged = mergeRetryResults(original, retried); + + assert.equal(merged.length, 3); + assert.equal(merged[0].id, 1); + assert.equal(merged[0].status, "success"); + assert.equal(merged[1].id, 2); + assert.equal(merged[1].status, "success"); + assert.equal(merged[1].diff, "new diff for 2"); + assert.equal(merged[2].id, 3); + assert.equal(merged[2].status, "success"); + assert.equal(merged[2].diff, "new diff for 3"); + }); + + it("preserves successful agents unchanged", () => { + const original = makeResult(); + const retried = [makeAgent({ id: 2, status: "success" })]; + + const merged = mergeRetryResults(original, retried); + + assert.equal(merged[0].id, 1); + assert.equal(merged[0].status, "success"); + assert.equal(merged[0].diff, original.agents[0].diff); + }); + + it("handles retry where agent still fails", () => { + const original = makeResult(); + const retried = [makeAgent({ id: 2, status: "error", diff: "" })]; + + const merged = mergeRetryResults(original, retried); + + assert.equal(merged[1].id, 2); + assert.equal(merged[1].status, "error"); + }); + + it("returns same count as original agents", () => { + const original = makeResult(); + const retried = [ + makeAgent({ id: 2, status: "success" }), + makeAgent({ id: 3, status: "success" }), + ]; + + const merged = mergeRetryResults(original, retried); + assert.equal(merged.length, original.agents.length); + }); +}); diff --git a/src/commands/run.ts b/src/commands/run.ts index 7bdd962..40e1a28 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -1,4 +1,4 @@ -import { mkdir, writeFile } from "node:fs/promises"; +import { mkdir, readFile, writeFile } from "node:fs/promises"; import { join } from "node:path"; import { getDefaultRunner, getRunner } from "../runners/registry.js"; import { analyzeConvergence, copelandRecommend, recommend } from "../scoring/convergence.js"; @@ -30,6 +30,207 @@ export async function preflightValidation(opts: RunOptions): Promise { + try { + const data = await readFile(join(".thinktank", "latest.json"), "utf-8"); + return JSON.parse(data) as EnsembleResult; + } catch { + return null; + } +} + +/** + * Identify agents from a previous result that failed (error or timeout). + */ +export function findFailedAgents(result: EnsembleResult): AgentResult[] { + return result.agents.filter((a) => a.status === "error" || a.status === "timeout"); +} + +/** + * Merge retried agent results back into the original result set, + * replacing agents that were retried. + */ +export function mergeRetryResults( + original: EnsembleResult, + retriedAgents: AgentResult[], +): AgentResult[] { + const retriedIds = new Set(retriedAgents.map((a) => a.id)); + return original.agents.map((a) => { + if (retriedIds.has(a.id)) { + const retried = retriedAgents.find((r) => r.id === a.id); + return retried!; + } + return a; + }); +} + +export async function retry(opts: RunOptions): Promise { + // Load previous result + const previous = await loadLatestResult(); + if (!previous) { + console.error(" No previous run found. Run 'thinktank run' first."); + process.exit(1); + } + + const failed = findFailedAgents(previous); + if (failed.length === 0) { + console.log(" All agents succeeded in the last run — nothing to retry."); + return; + } + + const failedIds = failed.map((a) => a.id); + console.log(); + console.log( + ` Retrying ${failed.length} failed agent(s): ${failedIds.map((id) => `#${id}`).join(", ")}`, + ); + console.log(` Prompt: ${previous.prompt}`); + console.log(` Model: ${previous.model}`); + console.log(); + + // Resolve runner + const runner = opts.runner ? getRunner(opts.runner) : getDefaultRunner(); + if (!runner) { + console.error(` Unknown runner: ${opts.runner}`); + console.error(" Available runners: claude-code"); + process.exit(1); + } + + const isAvailable = await runner.available(); + if (!isAvailable) { + console.error( + ` Runner "${runner.name}" is not available. Is ${runner.description} installed?`, + ); + process.exit(1); + } + + // Pre-flight validation (use previous prompt) + const preflightError = await preflightValidation({ + ...opts, + prompt: previous.prompt, + }); + if (preflightError) { + console.error(` ${preflightError}`); + process.exit(1); + } + + // Clean up old worktrees + await cleanupBranches().catch(() => {}); + + // Phase 1: Create worktrees only for failed agents + console.log(" Creating worktrees for failed agents..."); + const worktrees: Array<{ id: number; path: string }> = []; + + const handleSigint = () => { + console.log("\n\n Interrupted — cleaning up worktrees..."); + Promise.all(worktrees.map(({ path }) => removeWorktree(path).catch(() => {}))) + .then(() => cleanupBranches().catch(() => {})) + .then(() => process.exit(130)); + }; + process.on("SIGINT", handleSigint); + + const worktreeResults = await Promise.all( + failedIds.map((id) => createWorktree(id).then((path) => ({ id, path }))), + ); + for (const wt of worktreeResults) { + worktrees.push(wt); + console.log(` Agent #${wt.id}: ${wt.path}`); + } + console.log(); + + // Phase 2: Re-run failed agents with original prompt + console.log(` Re-running ${failed.length} agent(s) in parallel (${runner.name})...`); + console.log(); + + const agentPromises = worktrees.map(({ id, path }) => + runner.run(id, { + prompt: previous.prompt, + worktreePath: path, + model: previous.model, + timeout: opts.timeout, + verbose: opts.verbose, + }), + ); + + const retriedAgents: AgentResult[] = await Promise.all(agentPromises); + + for (const agent of retriedAgents) { + const icon = agent.status === "success" ? "✓" : agent.status === "timeout" ? "⏱" : "✗"; + const files = agent.filesChanged.length; + console.log( + ` Agent #${agent.id}: ${icon} ${agent.status} — ${files} files changed in ${Math.round(agent.duration / 1000)}s`, + ); + } + console.log(); + + // Phase 3: Merge retried agents back into original results + const mergedAgents = mergeRetryResults(previous, retriedAgents); + + // Phase 4: Run tests on ALL agents (retried get fresh tests, keep old test results for others) + let testResults = [...previous.tests]; + + if (opts.testCmd) { + console.log(` Running tests: ${opts.testCmd}`); + const testTimeoutMs = opts.testTimeout * 1000; + + // Run tests only on retried agents' worktrees + const retryTestPromises = worktrees.map(({ id, path }) => + runTests(id, opts.testCmd!, path, testTimeoutMs), + ); + const retryTestResults = await Promise.all(retryTestPromises); + + // Replace test results for retried agents + const retriedIds = new Set(failedIds); + testResults = testResults.filter((t) => !retriedIds.has(t.agentId)); + testResults.push(...retryTestResults); + + for (const test of retryTestResults) { + const icon = test.passed ? "✓" : "✗"; + console.log(` Agent #${test.agentId}: ${icon} tests ${test.passed ? "passed" : "failed"}`); + } + console.log(); + } + + // Phase 5: Convergence analysis on full merged set + const convergence = analyzeConvergence(mergedAgents, opts.threshold); + + // Phase 6: Recommendation + const { recommended: weightedRec, scores } = recommend(mergedAgents, testResults, convergence); + const copeland = copelandRecommend(mergedAgents, testResults, convergence); + + const recommended = opts.scoring === "copeland" ? copeland.recommended : weightedRec; + + // Build result object + const result: EnsembleResult = { + prompt: previous.prompt, + model: previous.model, + timestamp: new Date().toISOString(), + scoring: opts.scoring, + agents: mergedAgents, + tests: testResults, + convergence, + recommended, + scores, + copelandScores: copeland.scores, + }; + + // Display results + if (opts.outputFormat === "json") { + console.log(JSON.stringify(result)); + } else { + displayResults(result); + displayApplyInstructions(result); + } + + // Save result + await saveResult(result); + + process.removeListener("SIGINT", handleSigint); +} + export async function run(opts: RunOptions): Promise { displayHeader(opts.prompt, opts.attempts, opts.model); diff --git a/src/types.ts b/src/types.ts index 5faede6..ded3549 100644 --- a/src/types.ts +++ b/src/types.ts @@ -10,6 +10,7 @@ export interface RunOptions { runner?: string; scoring: "weighted" | "copeland"; outputFormat: "text" | "json"; + retry?: boolean; } export interface AgentResult {