Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions docs/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,20 @@ The agent with the highest total score is recommended. Ties broken by the first

### Copeland Pairwise Scoring (alternative)

Enabled with `--scoring copeland`. Instead of assigning absolute point values, Copeland scoring compares every pair of agents head-to-head on three criteria:
Enabled with `--scoring copeland`. Instead of assigning absolute point values, Copeland scoring compares every pair of agents head-to-head on four criteria:

| Criterion | Better = |
|-----------|----------|
| Tests passed | Passed > Failed |
| Convergence group size | Larger group > Smaller group |
| Files changed | Fewer files > More files |
| Criterion | Better = | Notes |
|-----------|----------|-------|
| Tests passed | Passed > Failed | |
| Convergence group size | Larger group > Smaller group | |
| Non-test files changed | Fewer files > More files | Minimal code scope preferred |
| Test files added/modified | More files > Fewer files | Capped at 3; only counts when agent also changed non-test files |

Test files are identified by the `*.test.*` or `*.spec.*` pattern in the file path.

**Anti-gaming:** The test files criterion only applies when the agent also changed production (non-test) code. An agent that only adds test files without changing production code receives no test coverage bonus — this prevents gaming the score with empty test padding.

**Cap:** The effective test file count is `min(testFiles, 3)`. This means 1 test file < 2 < 3+, but 3 and 10 are treated equally — adequate coverage is rewarded, but excessive test files don't dominate.

For each pair (A, B):
1. Count how many criteria A wins vs B wins
Expand Down
112 changes: 106 additions & 6 deletions src/scoring/convergence.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,8 @@ describe("copelandRecommend", () => {
assert.equal(score.copelandTotal, 0, `Agent #${score.agentId} should have Copeland score 0`);
assert.equal(score.testsWins, 0);
assert.equal(score.convergenceWins, 0);
assert.equal(score.filesChangedWins, 0);
assert.equal(score.nonTestFilesWins, 0);
assert.equal(score.testFilesWins, 0);
}
// Still recommends someone (first agent)
assert.ok(result.recommended !== null);
Expand All @@ -317,9 +318,10 @@ describe("copelandRecommend", () => {
const convergence = analyzeConvergence(agents);
const result = copelandRecommend(agents, tests, convergence);

// Agent 1 vs Agent 2: tests(+1), convergence(-1), files(-1) → Agent 2 wins
// Agent 1 vs Agent 3: tests(+1), convergence(-1), files(tie) → tie
// Agent 2 vs Agent 3: tests(tie), convergence(tie), files(+1 for 2) → Agent 2 wins
// No test files in any agent, so testFiles criterion is always tied
// Agent 1 vs Agent 2: tests(+1), convergence(-1), scope(-1), testFiles(tie) → Agent 2 wins
// Agent 1 vs Agent 3: tests(+1), convergence(-1), scope(tie), testFiles(tie) → tie
// Agent 2 vs Agent 3: tests(tie), convergence(tie), scope(+1 for 2), testFiles(tie) → Agent 2 wins
// So Agent 2 should have the best Copeland score
assert.equal(result.recommended, 2);
});
Expand Down Expand Up @@ -375,8 +377,8 @@ describe("copelandRecommend", () => {
// Score1 wins tests and files, score2 wins neither
assert.equal(score1.testsWins, 1);
assert.equal(score2.testsWins, -1);
assert.equal(score1.filesChangedWins, 1);
assert.equal(score2.filesChangedWins, -1);
assert.equal(score1.nonTestFilesWins, 1);
assert.equal(score2.nonTestFilesWins, -1);
});

it("handles single agent", () => {
Expand All @@ -387,4 +389,102 @@ describe("copelandRecommend", () => {
assert.equal(result.scores.length, 1);
assert.equal(result.scores[0]!.copelandTotal, 0);
});

it("agent with tests beats agent without when other criteria tie", () => {
// Both agents change 1 prod file + 1 test file, same convergence
const agents = [
makeAgent({ id: 1, diff: DIFF_A, filesChanged: ["a.ts", "a.test.ts"] }),
makeAgent({ id: 2, diff: DIFF_A, filesChanged: ["a.ts"] }),
];
const tests = [
{ agentId: 1, passed: true },
{ agentId: 2, passed: true },
];
const convergence = analyzeConvergence(agents);
const result = copelandRecommend(agents, tests, convergence);

// Agent 1 wins testFiles criterion (+1 vs 0), ties everything else
assert.equal(result.recommended, 1);
const score1 = result.scores.find((s) => s.agentId === 1);
assert.ok(score1);
assert.ok(score1.testFilesWins > 0);
});

it("test-only changes do not get test file bonus", () => {
// Agent 1 changes only test files (no prod code) — should not get testFiles bonus
// Agent 2 changes 1 prod file
const agents = [
makeAgent({ id: 1, diff: DIFF_A, filesChanged: ["a.test.ts", "b.spec.ts"] }),
makeAgent({ id: 2, diff: DIFF_B, filesChanged: ["x.ts"] }),
];
const convergence = analyzeConvergence(agents);
const result = copelandRecommend(agents, [], convergence);

const score1 = result.scores.find((s) => s.agentId === 1);
const score2 = result.scores.find((s) => s.agentId === 2);
assert.ok(score1);
assert.ok(score2);

// Agent 1 has 0 effective test files (no prod changes), Agent 2 also has 0 test files
// So testFilesWins should be 0 for both
assert.equal(score1.testFilesWins, 0);
assert.equal(score2.testFilesWins, 0);

// Agent 1 has 0 nonTestFiles, Agent 2 has 1 — but fewer is better,
// so Agent 1 wins scope. However Agent 2 is not disadvantaged on testFiles.
});

it("test file cap prevents gaming with many test files", () => {
// Agent 1: 1 prod file + 10 test files
// Agent 2: 1 prod file + 2 test files
// Agent 3: 1 prod file + 3 test files
// After capping at 3: Agent 1 effective=3, Agent 2 effective=2, Agent 3 effective=3
const agents = [
makeAgent({
id: 1,
diff: DIFF_A,
filesChanged: [
"a.ts",
"a.test.ts",
"b.test.ts",
"c.test.ts",
"d.test.ts",
"e.test.ts",
"f.test.ts",
"g.test.ts",
"h.test.ts",
"i.test.ts",
"j.test.ts",
],
}),
makeAgent({
id: 2,
diff: DIFF_A,
filesChanged: ["a.ts", "a.test.ts", "b.test.ts"],
}),
makeAgent({
id: 3,
diff: DIFF_A,
filesChanged: ["a.ts", "a.test.ts", "b.test.ts", "c.test.ts"],
}),
];
const convergence = analyzeConvergence(agents);
const result = copelandRecommend(agents, [], convergence);

const score1 = result.scores.find((s) => s.agentId === 1);
const score3 = result.scores.find((s) => s.agentId === 3);
assert.ok(score1);
assert.ok(score3);

// Agent 1 (10 test files capped to 3) and Agent 3 (3 test files capped to 3)
// should tie on testFiles criterion
// Agent 1 vs Agent 3: testFilesWins contribution should be 0 (tie)
// In pairwise: A1 effective=3 vs A3 effective=3 → tie on testFiles
assert.equal(score1.testFilesWins, score3.testFilesWins);

// Agent 2 (2 test files) should lose to both Agent 1 and Agent 3 on testFiles
const score2 = result.scores.find((s) => s.agentId === 2);
assert.ok(score2);
assert.ok(score2.testFilesWins < score1.testFilesWins);
});
});
65 changes: 54 additions & 11 deletions src/scoring/convergence.ts
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,39 @@ export function recommend(
return { recommended: bestId, scores: agentScores };
}

const TEST_FILE_PATTERN = /[./](?:test|spec)\./;

/** Cap for test file criterion — prevents gaming with many test files */
const TEST_FILE_CAP = 3;

/**
* Count test files (matching *.test.* or *.spec.*) and non-test files separately.
*/
function splitFilesByType(files: string[]): { testFiles: number; nonTestFiles: number } {
let testFiles = 0;
let nonTestFiles = 0;
for (const f of files) {
if (TEST_FILE_PATTERN.test(f)) {
testFiles++;
} else {
nonTestFiles++;
}
}
return { testFiles, nonTestFiles };
}

/**
* Effective test file count for scoring: capped at TEST_FILE_CAP, and only
* counts when the agent also changed non-test files (prevents gaming).
*/
function effectiveTestFiles(testFiles: number, nonTestFiles: number): number {
if (nonTestFiles === 0) return 0;
return Math.min(testFiles, TEST_FILE_CAP);
}

/**
* Copeland pairwise scoring: compare every pair of agents head-to-head
* on three criteria (tests passed, convergence group size, files changed).
* on four criteria (tests passed, convergence group size, non-test files changed, test files).
* For each pair, the agent winning more criteria gets +1, the loser gets -1, ties get 0.
* The agent with the highest Copeland score is recommended.
*/
Expand All @@ -197,8 +227,9 @@ export function copelandRecommend(
const testsPassed = test?.passed ? 1 : 0;
const group = convergence.find((g) => g.agents.includes(agent.id));
const groupSize = group ? group.agents.length : 0;
const filesChanged = agent.filesChanged.length;
return { id: agent.id, testsPassed, groupSize, filesChanged };
const { testFiles, nonTestFiles } = splitFilesByType(agent.filesChanged);
const cappedTestFiles = effectiveTestFiles(testFiles, nonTestFiles);
return { id: agent.id, testsPassed, groupSize, nonTestFiles, cappedTestFiles };
});

// Initialize scores
Expand All @@ -208,7 +239,8 @@ export function copelandRecommend(
agentId: data.id,
testsWins: 0,
convergenceWins: 0,
filesChangedWins: 0,
nonTestFilesWins: 0,
testFilesWins: 0,
copelandTotal: 0,
});
}
Expand Down Expand Up @@ -244,15 +276,26 @@ export function copelandRecommend(
scoreMap.get(a.id)!.convergenceWins--;
}

// Criterion 3: files changed (fewer is better — minimal changes preferred)
if (a.filesChanged < b.filesChanged) {
// Criterion 3: non-test files changed (fewer is better — minimal code scope)
if (a.nonTestFiles < b.nonTestFiles) {
aWins++;
scoreMap.get(a.id)!.nonTestFilesWins++;
scoreMap.get(b.id)!.nonTestFilesWins--;
} else if (b.nonTestFiles < a.nonTestFiles) {
bWins++;
scoreMap.get(b.id)!.nonTestFilesWins++;
scoreMap.get(a.id)!.nonTestFilesWins--;
}

// Criterion 4: test files added/modified (more is better, capped, only with prod changes)
if (a.cappedTestFiles > b.cappedTestFiles) {
aWins++;
scoreMap.get(a.id)!.filesChangedWins++;
scoreMap.get(b.id)!.filesChangedWins--;
} else if (b.filesChanged < a.filesChanged) {
scoreMap.get(a.id)!.testFilesWins++;
scoreMap.get(b.id)!.testFilesWins--;
} else if (b.cappedTestFiles > a.cappedTestFiles) {
bWins++;
scoreMap.get(b.id)!.filesChangedWins++;
scoreMap.get(a.id)!.filesChangedWins--;
scoreMap.get(b.id)!.testFilesWins++;
scoreMap.get(a.id)!.testFilesWins--;
}

// Overall Copeland: winner of more criteria gets +1, loser -1
Expand Down
3 changes: 2 additions & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ export interface CopelandScore {
agentId: number;
testsWins: number;
convergenceWins: number;
filesChangedWins: number;
nonTestFilesWins: number;
testFilesWins: number;
copelandTotal: number;
}

Expand Down
10 changes: 6 additions & 4 deletions src/utils/display.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,11 @@ export function displayResults(result: EnsembleResult): void {
padRight("Agent", 8) +
padRight("Tests", 10) +
padRight("Converge", 10) +
padRight("Files", 10) +
padRight("Scope", 10) +
padRight("TestCov", 10) +
padRight("Copeland", 10),
);
console.log(" " + pc.dim("─".repeat(48)));
console.log(" " + pc.dim("─".repeat(58)));

for (const score of result.copelandScores) {
const isRecommended = result.scoring === "copeland" && result.recommended === score.agentId;
Expand All @@ -121,7 +122,8 @@ export function displayResults(result: EnsembleResult): void {
padRight(`#${score.agentId}`, 8) +
padRight(fmt(score.testsWins), 10) +
padRight(fmt(score.convergenceWins), 10) +
padRight(fmt(score.filesChangedWins), 10) +
padRight(fmt(score.nonTestFilesWins), 10) +
padRight(fmt(score.testFilesWins), 10) +
padRight(fmt(score.copelandTotal), 10),
);
}
Expand All @@ -133,7 +135,7 @@ export function displayResults(result: EnsembleResult): void {
const method = result.scoring === "copeland" ? "Copeland pairwise" : "weighted";
console.log(
pc.cyan(` Recommended: Agent #${result.recommended}`) +
pc.dim(` (${method} scoring: tests + convergence + diff size)`),
pc.dim(` (${method} scoring: tests + convergence + scope + test coverage)`),
);
console.log();
}
Expand Down
Loading