diff --git a/src/cli.ts b/src/cli.ts index ceb5298..b5f9ec5 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -125,9 +125,15 @@ program program .command("list") - .description("List results from the most recent ensemble run") - .action(async () => { - await list(); + .description("List all past runs, or show details for a specific run") + .argument("[run-number]", "Run number to show details for") + .action(async (runNumberArg?: string) => { + const runNumber = runNumberArg ? parseInt(runNumberArg, 10) : undefined; + if (runNumberArg && (Number.isNaN(runNumber) || (runNumber as number) < 1)) { + console.error("Error: run number must be a positive integer"); + process.exit(1); + } + await list(runNumber); }); program diff --git a/src/commands/list.test.ts b/src/commands/list.test.ts new file mode 100644 index 0000000..f0b9dae --- /dev/null +++ b/src/commands/list.test.ts @@ -0,0 +1,96 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; +import type { EnsembleResult } from "../types.js"; +import { buildRunSummary, extractRunNumber } from "./list.js"; + +describe("extractRunNumber", () => { + it("returns -1 (run numbers assigned by load order, not filename)", () => { + assert.equal(extractRunNumber("run-2026-03-28T05-58-48-564Z.json"), -1); + assert.equal(extractRunNumber("latest.json"), -1); + }); +}); + +function makeResult(overrides: Partial = {}): EnsembleResult { + return { + prompt: "test prompt", + model: "sonnet", + timestamp: "2026-03-28T10:00:00Z", + scoring: "copeland", + agents: [ + { + id: 1, + worktree: "/tmp/w1", + status: "success", + exitCode: 0, + duration: 5000, + output: "", + diff: "diff", + filesChanged: ["a.ts"], + linesAdded: 10, + linesRemoved: 2, + }, + { + id: 2, + worktree: "/tmp/w2", + status: "success", + exitCode: 0, + duration: 8000, + output: "", + diff: "diff", + filesChanged: ["a.ts", "b.ts"], + linesAdded: 20, + linesRemoved: 5, + }, + ], + tests: [ + { agentId: 1, passed: true, output: "", exitCode: 0 }, + { agentId: 2, passed: false, output: "", exitCode: 1 }, + ], + convergence: [ + { agents: [1, 2], similarity: 0.8, filesChanged: ["a.ts"], description: "similar" }, + ], + recommended: 1, + scores: [], + ...overrides, + }; +} + +describe("buildRunSummary", () => { + it("builds summary with test pass rate and convergence", () => { + const result = makeResult(); + const summary = buildRunSummary(3, result); + + assert.equal(summary.runNumber, 3); + assert.equal(summary.timestamp, "2026-03-28T10:00:00Z"); + assert.equal(summary.agentCount, 2); + assert.equal(summary.recommended, 1); + assert.equal(summary.testPassRate, 0.5); + assert.equal(summary.avgConvergence, 0.8); + }); + + it("returns null test pass rate when no tests", () => { + const result = makeResult({ tests: [] }); + const summary = buildRunSummary(1, result); + + assert.equal(summary.testPassRate, null); + }); + + it("returns null convergence when no convergence groups", () => { + const result = makeResult({ convergence: [] }); + const summary = buildRunSummary(1, result); + + assert.equal(summary.avgConvergence, null); + }); + + it("averages multiple convergence groups", () => { + const result = makeResult({ + convergence: [ + { agents: [1, 2], similarity: 0.6, filesChanged: ["a.ts"], description: "a" }, + { agents: [1, 2], similarity: 1.0, filesChanged: ["b.ts"], description: "b" }, + ], + }); + const summary = buildRunSummary(5, result); + + assert.equal(summary.avgConvergence, 0.8); + }); +}); diff --git a/src/commands/list.ts b/src/commands/list.ts index ee6ef6e..eb31dc8 100644 --- a/src/commands/list.ts +++ b/src/commands/list.ts @@ -1,13 +1,119 @@ -import { readFile } from "node:fs/promises"; +import { readdir, readFile } from "node:fs/promises"; import { join } from "node:path"; +import pc from "picocolors"; import type { EnsembleResult } from "../types.js"; -import { displayResults } from "../utils/display.js"; +import { displayResults, padRight } from "../utils/display.js"; -export async function list(): Promise { +export interface RunSummary { + runNumber: number; + timestamp: string; + agentCount: number; + recommended: number | null; + testPassRate: number | null; + avgConvergence: number | null; +} + +/** Run numbers are assigned by sort order (chronological), not embedded in filename */ +export function extractRunNumber(_filename: string): number { + return -1; // Assigned by loadAllRuns based on sort position +} + +export function buildRunSummary(runNumber: number, result: EnsembleResult): RunSummary { + const testPassRate = + result.tests.length > 0 + ? result.tests.filter((t) => t.passed).length / result.tests.length + : null; + + const similarities = result.convergence.map((g) => g.similarity); + const avgConvergence = + similarities.length > 0 + ? similarities.reduce((sum, s) => sum + s, 0) / similarities.length + : null; + + return { + runNumber, + timestamp: result.timestamp, + agentCount: result.agents.length, + recommended: result.recommended, + testPassRate, + avgConvergence, + }; +} + +export async function loadAllRuns(): Promise<{ filename: string; result: EnsembleResult }[]> { + const entries = await readdir(".thinktank"); + const files = entries.filter((f) => f.startsWith("run-") && f.endsWith(".json")).sort(); // Lexicographic sort = chronological for ISO timestamps + + const runs: { filename: string; result: EnsembleResult }[] = []; + for (const file of files) { + try { + const raw = await readFile(join(".thinktank", file), "utf-8"); + runs.push({ filename: file, result: JSON.parse(raw) as EnsembleResult }); + } catch { + // skip malformed files + } + } + return runs; +} + +function displayRunTable(summaries: RunSummary[]): void { + console.log(); + console.log(pc.bold("Run History")); + console.log(pc.dim("─".repeat(72))); + console.log( + " " + + padRight("Run", 6) + + padRight("Timestamp", 22) + + padRight("Agents", 8) + + padRight("Best", 6) + + padRight("Tests", 10) + + padRight("Convergence", 12), + ); + console.log(` ${pc.dim("─".repeat(64))}`); + + for (const s of summaries) { + const testStr = + s.testPassRate !== null ? `${Math.round(s.testPassRate * 100)}%` : pc.dim("n/a"); + const convStr = + s.avgConvergence !== null ? `${Math.round(s.avgConvergence * 100)}%` : pc.dim("n/a"); + const bestStr = s.recommended !== null ? `#${s.recommended}` : pc.dim("n/a"); + + console.log( + " " + + padRight(`${s.runNumber}`, 6) + + padRight(s.timestamp, 22) + + padRight(String(s.agentCount), 8) + + padRight(bestStr, 6) + + padRight(testStr, 10) + + padRight(convStr, 12), + ); + } + + console.log(); + console.log(pc.dim(" View details: thinktank list ")); + console.log(); +} + +export async function list(runNumber?: number): Promise { try { - const raw = await readFile(join(".thinktank", "latest.json"), "utf-8"); - const result: EnsembleResult = JSON.parse(raw); - displayResults(result); + const runs = await loadAllRuns(); + + if (runs.length === 0) { + console.log(" No results found. Run `thinktank run` first."); + return; + } + + if (runNumber !== undefined) { + if (runNumber < 1 || runNumber > runs.length) { + console.log(` Run #${runNumber} not found. Valid range: 1-${runs.length}`); + return; + } + displayResults(runs[runNumber - 1]!.result); + return; + } + + const summaries = runs.map((r, i) => buildRunSummary(i + 1, r.result)); + displayRunTable(summaries); } catch { console.log(" No results found. Run `thinktank run` first."); }