Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 155 additions & 1 deletion src/commands/run.test.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import assert from "node:assert/strict";
import { mkdir, rm, writeFile } from "node:fs/promises";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, it } from "node:test";
import type { AgentResult, EnsembleResult, RunOptions } from "../types.js";
import type { AgentResult, EnsembleResult, RunOptions, TestResult } from "../types.js";
import {
findFailedAgents,
loadLatestResult,
makeResultFilename,
mergeRetryResults,
preflightValidation,
Expand Down Expand Up @@ -253,4 +257,154 @@ describe("mergeRetryResults", () => {
const merged = mergeRetryResults(original, retried);
assert.equal(merged.length, original.agents.length);
});

it("replaces all agents when all were retried", () => {
const original = makeResult({
agents: [
makeAgent({ id: 1, status: "error", diff: "", filesChanged: [] }),
makeAgent({ id: 2, status: "timeout", diff: "", filesChanged: [] }),
makeAgent({ id: 3, status: "error", diff: "", filesChanged: [] }),
],
});
const retried = [
makeAgent({ id: 1, status: "success", diff: "diff 1" }),
makeAgent({ id: 2, status: "success", diff: "diff 2" }),
makeAgent({ id: 3, status: "error", diff: "" }),
];

const merged = mergeRetryResults(original, retried);

assert.equal(merged.length, 3);
assert.equal(merged[0].status, "success");
assert.equal(merged[0].diff, "diff 1");
assert.equal(merged[1].status, "success");
assert.equal(merged[1].diff, "diff 2");
assert.equal(merged[2].status, "error"); // retry also failed
});
});

describe("loadLatestResult", () => {
it("returns null when latest.json does not exist", async () => {
// Save and restore cwd to point at a temp dir with no .thinktank/
const originalCwd = process.cwd();
const tempDir = join(tmpdir(), `thinktank-test-load-${Date.now()}`);
await mkdir(tempDir, { recursive: true });
try {
process.chdir(tempDir);
const result = await loadLatestResult();
assert.equal(result, null);
} finally {
process.chdir(originalCwd);
await rm(tempDir, { recursive: true, force: true });
}
});

it("returns null when latest.json contains invalid JSON", async () => {
const originalCwd = process.cwd();
const tempDir = join(tmpdir(), `thinktank-test-load-${Date.now()}`);
const ttDir = join(tempDir, ".thinktank");
await mkdir(ttDir, { recursive: true });
try {
await writeFile(join(ttDir, "latest.json"), "not valid json{{{");
process.chdir(tempDir);
const result = await loadLatestResult();
assert.equal(result, null);
} finally {
process.chdir(originalCwd);
await rm(tempDir, { recursive: true, force: true });
}
});

it("loads a valid latest.json", async () => {
const originalCwd = process.cwd();
const tempDir = join(tmpdir(), `thinktank-test-load-${Date.now()}`);
const ttDir = join(tempDir, ".thinktank");
await mkdir(ttDir, { recursive: true });
const expected = makeResult();
try {
await writeFile(join(ttDir, "latest.json"), JSON.stringify(expected));
process.chdir(tempDir);
const result = await loadLatestResult();
assert.ok(result);
assert.equal(result.prompt, expected.prompt);
assert.equal(result.agents.length, expected.agents.length);
} finally {
process.chdir(originalCwd);
await rm(tempDir, { recursive: true, force: true });
}
});
});

describe("retry edge cases", () => {
it("findFailedAgents returns all agents when every agent failed", () => {
const result = makeResult({
agents: [
makeAgent({ id: 1, status: "error" }),
makeAgent({ id: 2, status: "timeout" }),
makeAgent({ id: 3, status: "error" }),
],
});
const failed = findFailedAgents(result);
assert.equal(failed.length, 3);
assert.deepEqual(failed.map((a) => a.id).sort(), [1, 2, 3]);
});

it("mergeRetryResults handles retry where all retried agents fail again", () => {
const original = makeResult({
agents: [
makeAgent({ id: 1, status: "success", diff: "good diff" }),
makeAgent({ id: 2, status: "error", diff: "" }),
makeAgent({ id: 3, status: "timeout", diff: "" }),
],
});
const retried = [
makeAgent({ id: 2, status: "timeout", diff: "" }),
makeAgent({ id: 3, status: "error", diff: "" }),
];

const merged = mergeRetryResults(original, retried);

// Agent 1 preserved, agents 2 and 3 replaced with still-failed results
assert.equal(merged[0].status, "success");
assert.equal(merged[0].diff, "good diff");
assert.equal(merged[1].status, "timeout");
assert.equal(merged[2].status, "error");
});

it("stale test results are removed for retried agents even without --test-cmd", () => {
// Simulate: previous run had test results for all agents,
// agents 2 and 3 failed and are being retried.
// After merge, test results for agents 2 and 3 should be gone
// since their code changed.
const previousTests: TestResult[] = [
{ agentId: 1, passed: true, output: "ok", exitCode: 0 },
{ agentId: 2, passed: false, output: "fail", exitCode: 1 },
{ agentId: 3, passed: false, output: "timeout", exitCode: 1 },
];

const retriedIds = new Set([2, 3]);
// This mirrors the fixed logic in retry(): filter out stale tests
const filtered = previousTests.filter((t) => !retriedIds.has(t.agentId));

assert.equal(filtered.length, 1);
assert.equal(filtered[0].agentId, 1);
});

it("mergeRetryResults with single failed agent preserves others", () => {
const original = makeResult({
agents: [
makeAgent({ id: 1, status: "success", diff: "diff1" }),
makeAgent({ id: 2, status: "success", diff: "diff2" }),
makeAgent({ id: 3, status: "error", diff: "" }),
],
});
const retried = [makeAgent({ id: 3, status: "success", diff: "new diff3" })];

const merged = mergeRetryResults(original, retried);

assert.equal(merged[0].diff, "diff1");
assert.equal(merged[1].diff, "diff2");
assert.equal(merged[2].status, "success");
assert.equal(merged[2].diff, "new diff3");
});
});
55 changes: 35 additions & 20 deletions src/commands/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,21 @@ export async function retry(opts: RunOptions): Promise<void> {
};
process.on("SIGINT", handleSigint);

const worktreeResults = await Promise.all(
failedIds.map((id) => createWorktree(id).then((path) => ({ id, path }))),
);
for (const wt of worktreeResults) {
worktrees.push(wt);
console.log(` Agent #${wt.id}: ${wt.path}`);
try {
const worktreeResults = await Promise.all(
failedIds.map((id) => createWorktree(id).then((path) => ({ id, path }))),
);
for (const wt of worktreeResults) {
worktrees.push(wt);
console.log(` Agent #${wt.id}: ${wt.path}`);
}
} catch (err) {
// Clean up any worktrees that were created before the failure
console.error(" Failed to create worktrees — cleaning up...");
await Promise.all(worktrees.map(({ path }) => removeWorktree(path).catch(() => {})));
await cleanupBranches().catch(() => {});
process.removeListener("SIGINT", handleSigint);
throw err;
}
console.log();

Expand Down Expand Up @@ -169,8 +178,10 @@ export async function retry(opts: RunOptions): Promise<void> {
// Phase 3: Merge retried agents back into original results
const mergedAgents = mergeRetryResults(previous, retriedAgents);

// Phase 4: Run tests on ALL agents (retried get fresh tests, keep old test results for others)
let testResults = [...previous.tests];
// Phase 4: Run tests — always discard stale test results for retried agents,
// since they now have different code regardless of whether --test-cmd is provided.
const retriedIdSet = new Set(failedIds);
const testResults = previous.tests.filter((t) => !retriedIdSet.has(t.agentId));

if (opts.testCmd) {
console.log(` Running tests: ${opts.testCmd}`);
Expand All @@ -181,10 +192,6 @@ export async function retry(opts: RunOptions): Promise<void> {
runTests(id, opts.testCmd!, path, testTimeoutMs),
);
const retryTestResults = await Promise.all(retryTestPromises);

// Replace test results for retried agents
const retriedIds = new Set(failedIds);
testResults = testResults.filter((t) => !retriedIds.has(t.agentId));
testResults.push(...retryTestResults);

for (const test of retryTestResults) {
Expand Down Expand Up @@ -274,14 +281,22 @@ export async function run(opts: RunOptions): Promise<void> {
};
process.on("SIGINT", handleSigint);

const worktreeResults = await Promise.all(
Array.from({ length: opts.attempts }, (_, i) =>
createWorktree(i + 1).then((path) => ({ id: i + 1, path })),
),
);
for (const wt of worktreeResults) {
worktrees.push(wt);
console.log(` Agent #${wt.id}: ${wt.path}`);
try {
const worktreeResults = await Promise.all(
Array.from({ length: opts.attempts }, (_, i) =>
createWorktree(i + 1).then((path) => ({ id: i + 1, path })),
),
);
for (const wt of worktreeResults) {
worktrees.push(wt);
console.log(` Agent #${wt.id}: ${wt.path}`);
}
} catch (err) {
console.error(" Failed to create worktrees — cleaning up...");
await Promise.all(worktrees.map(({ path }) => removeWorktree(path).catch(() => {})));
await cleanupBranches().catch(() => {});
process.removeListener("SIGINT", handleSigint);
throw err;
}
console.log();

Expand Down
Loading