From 5a91060e69e27418cd5725bc5f6d975ad46dee61 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 28 Mar 2026 16:57:02 -0700 Subject: [PATCH] Harden retry implementation with edge case tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-ran thinktank with 5 Opus agents (600s timeout, all completed) to review the #77 retry implementation. Agent #5 added 8 edge case tests: - All agents failed → retry all - latest.json missing/invalid → clear error - All retried agents fail again → handled gracefully - Stale test results removed for retried agents - Single failed agent → others preserved - loadLatestResult null safety Original had 1/5 agents complete due to 300s timeout. With 600s, 5/5 completed, 2 passed tests. Agent #5 chosen over #3 (more granular unit tests, exported merge function). Co-Authored-By: Claude Opus 4.6 (1M context) --- src/commands/run.test.ts | 156 ++++++++++++++++++++++++++++++++++++++- src/commands/run.ts | 55 +++++++++----- 2 files changed, 190 insertions(+), 21 deletions(-) diff --git a/src/commands/run.test.ts b/src/commands/run.test.ts index 3bacd7d..d2ddde8 100644 --- a/src/commands/run.test.ts +++ b/src/commands/run.test.ts @@ -1,8 +1,12 @@ import assert from "node:assert/strict"; +import { mkdir, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; import { afterEach, describe, it } from "node:test"; -import type { AgentResult, EnsembleResult, RunOptions } from "../types.js"; +import type { AgentResult, EnsembleResult, RunOptions, TestResult } from "../types.js"; import { findFailedAgents, + loadLatestResult, makeResultFilename, mergeRetryResults, preflightValidation, @@ -253,4 +257,154 @@ describe("mergeRetryResults", () => { const merged = mergeRetryResults(original, retried); assert.equal(merged.length, original.agents.length); }); + + it("replaces all agents when all were retried", () => { + const original = makeResult({ + agents: [ + makeAgent({ id: 1, status: "error", diff: "", filesChanged: [] }), + makeAgent({ id: 2, status: "timeout", diff: "", filesChanged: [] }), + makeAgent({ id: 3, status: "error", diff: "", filesChanged: [] }), + ], + }); + const retried = [ + makeAgent({ id: 1, status: "success", diff: "diff 1" }), + makeAgent({ id: 2, status: "success", diff: "diff 2" }), + makeAgent({ id: 3, status: "error", diff: "" }), + ]; + + const merged = mergeRetryResults(original, retried); + + assert.equal(merged.length, 3); + assert.equal(merged[0].status, "success"); + assert.equal(merged[0].diff, "diff 1"); + assert.equal(merged[1].status, "success"); + assert.equal(merged[1].diff, "diff 2"); + assert.equal(merged[2].status, "error"); // retry also failed + }); +}); + +describe("loadLatestResult", () => { + it("returns null when latest.json does not exist", async () => { + // Save and restore cwd to point at a temp dir with no .thinktank/ + const originalCwd = process.cwd(); + const tempDir = join(tmpdir(), `thinktank-test-load-${Date.now()}`); + await mkdir(tempDir, { recursive: true }); + try { + process.chdir(tempDir); + const result = await loadLatestResult(); + assert.equal(result, null); + } finally { + process.chdir(originalCwd); + await rm(tempDir, { recursive: true, force: true }); + } + }); + + it("returns null when latest.json contains invalid JSON", async () => { + const originalCwd = process.cwd(); + const tempDir = join(tmpdir(), `thinktank-test-load-${Date.now()}`); + const ttDir = join(tempDir, ".thinktank"); + await mkdir(ttDir, { recursive: true }); + try { + await writeFile(join(ttDir, "latest.json"), "not valid json{{{"); + process.chdir(tempDir); + const result = await loadLatestResult(); + assert.equal(result, null); + } finally { + process.chdir(originalCwd); + await rm(tempDir, { recursive: true, force: true }); + } + }); + + it("loads a valid latest.json", async () => { + const originalCwd = process.cwd(); + const tempDir = join(tmpdir(), `thinktank-test-load-${Date.now()}`); + const ttDir = join(tempDir, ".thinktank"); + await mkdir(ttDir, { recursive: true }); + const expected = makeResult(); + try { + await writeFile(join(ttDir, "latest.json"), JSON.stringify(expected)); + process.chdir(tempDir); + const result = await loadLatestResult(); + assert.ok(result); + assert.equal(result.prompt, expected.prompt); + assert.equal(result.agents.length, expected.agents.length); + } finally { + process.chdir(originalCwd); + await rm(tempDir, { recursive: true, force: true }); + } + }); +}); + +describe("retry edge cases", () => { + it("findFailedAgents returns all agents when every agent failed", () => { + const result = makeResult({ + agents: [ + makeAgent({ id: 1, status: "error" }), + makeAgent({ id: 2, status: "timeout" }), + makeAgent({ id: 3, status: "error" }), + ], + }); + const failed = findFailedAgents(result); + assert.equal(failed.length, 3); + assert.deepEqual(failed.map((a) => a.id).sort(), [1, 2, 3]); + }); + + it("mergeRetryResults handles retry where all retried agents fail again", () => { + const original = makeResult({ + agents: [ + makeAgent({ id: 1, status: "success", diff: "good diff" }), + makeAgent({ id: 2, status: "error", diff: "" }), + makeAgent({ id: 3, status: "timeout", diff: "" }), + ], + }); + const retried = [ + makeAgent({ id: 2, status: "timeout", diff: "" }), + makeAgent({ id: 3, status: "error", diff: "" }), + ]; + + const merged = mergeRetryResults(original, retried); + + // Agent 1 preserved, agents 2 and 3 replaced with still-failed results + assert.equal(merged[0].status, "success"); + assert.equal(merged[0].diff, "good diff"); + assert.equal(merged[1].status, "timeout"); + assert.equal(merged[2].status, "error"); + }); + + it("stale test results are removed for retried agents even without --test-cmd", () => { + // Simulate: previous run had test results for all agents, + // agents 2 and 3 failed and are being retried. + // After merge, test results for agents 2 and 3 should be gone + // since their code changed. + const previousTests: TestResult[] = [ + { agentId: 1, passed: true, output: "ok", exitCode: 0 }, + { agentId: 2, passed: false, output: "fail", exitCode: 1 }, + { agentId: 3, passed: false, output: "timeout", exitCode: 1 }, + ]; + + const retriedIds = new Set([2, 3]); + // This mirrors the fixed logic in retry(): filter out stale tests + const filtered = previousTests.filter((t) => !retriedIds.has(t.agentId)); + + assert.equal(filtered.length, 1); + assert.equal(filtered[0].agentId, 1); + }); + + it("mergeRetryResults with single failed agent preserves others", () => { + const original = makeResult({ + agents: [ + makeAgent({ id: 1, status: "success", diff: "diff1" }), + makeAgent({ id: 2, status: "success", diff: "diff2" }), + makeAgent({ id: 3, status: "error", diff: "" }), + ], + }); + const retried = [makeAgent({ id: 3, status: "success", diff: "new diff3" })]; + + const merged = mergeRetryResults(original, retried); + + assert.equal(merged[0].diff, "diff1"); + assert.equal(merged[1].diff, "diff2"); + assert.equal(merged[2].status, "success"); + assert.equal(merged[2].diff, "new diff3"); + }); }); diff --git a/src/commands/run.ts b/src/commands/run.ts index 40e1a28..88e0ca2 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -132,12 +132,21 @@ export async function retry(opts: RunOptions): Promise { }; process.on("SIGINT", handleSigint); - const worktreeResults = await Promise.all( - failedIds.map((id) => createWorktree(id).then((path) => ({ id, path }))), - ); - for (const wt of worktreeResults) { - worktrees.push(wt); - console.log(` Agent #${wt.id}: ${wt.path}`); + try { + const worktreeResults = await Promise.all( + failedIds.map((id) => createWorktree(id).then((path) => ({ id, path }))), + ); + for (const wt of worktreeResults) { + worktrees.push(wt); + console.log(` Agent #${wt.id}: ${wt.path}`); + } + } catch (err) { + // Clean up any worktrees that were created before the failure + console.error(" Failed to create worktrees — cleaning up..."); + await Promise.all(worktrees.map(({ path }) => removeWorktree(path).catch(() => {}))); + await cleanupBranches().catch(() => {}); + process.removeListener("SIGINT", handleSigint); + throw err; } console.log(); @@ -169,8 +178,10 @@ export async function retry(opts: RunOptions): Promise { // Phase 3: Merge retried agents back into original results const mergedAgents = mergeRetryResults(previous, retriedAgents); - // Phase 4: Run tests on ALL agents (retried get fresh tests, keep old test results for others) - let testResults = [...previous.tests]; + // Phase 4: Run tests — always discard stale test results for retried agents, + // since they now have different code regardless of whether --test-cmd is provided. + const retriedIdSet = new Set(failedIds); + const testResults = previous.tests.filter((t) => !retriedIdSet.has(t.agentId)); if (opts.testCmd) { console.log(` Running tests: ${opts.testCmd}`); @@ -181,10 +192,6 @@ export async function retry(opts: RunOptions): Promise { runTests(id, opts.testCmd!, path, testTimeoutMs), ); const retryTestResults = await Promise.all(retryTestPromises); - - // Replace test results for retried agents - const retriedIds = new Set(failedIds); - testResults = testResults.filter((t) => !retriedIds.has(t.agentId)); testResults.push(...retryTestResults); for (const test of retryTestResults) { @@ -274,14 +281,22 @@ export async function run(opts: RunOptions): Promise { }; process.on("SIGINT", handleSigint); - const worktreeResults = await Promise.all( - Array.from({ length: opts.attempts }, (_, i) => - createWorktree(i + 1).then((path) => ({ id: i + 1, path })), - ), - ); - for (const wt of worktreeResults) { - worktrees.push(wt); - console.log(` Agent #${wt.id}: ${wt.path}`); + try { + const worktreeResults = await Promise.all( + Array.from({ length: opts.attempts }, (_, i) => + createWorktree(i + 1).then((path) => ({ id: i + 1, path })), + ), + ); + for (const wt of worktreeResults) { + worktrees.push(wt); + console.log(` Agent #${wt.id}: ${wt.path}`); + } + } catch (err) { + console.error(" Failed to create worktrees — cleaning up..."); + await Promise.all(worktrees.map(({ path }) => removeWorktree(path).catch(() => {}))); + await cleanupBranches().catch(() => {}); + process.removeListener("SIGINT", handleSigint); + throw err; } console.log();