From 93601f229085b37a4ceba470c51c94a922aa6079 Mon Sep 17 00:00:00 2001
From: unknown <that-github-user@github.com>
Date: Sat, 28 Mar 2026 18:04:47 -0700
Subject: [PATCH] Add test coverage as 4th Copeland criterion with anti-gaming
 safeguards
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Split filesChanged into nonTestFiles (fewer=better) and testFiles
(more=better, capped at 3). Anti-gaming: test files only count when
agent also changed production code. Prevents score inflation via
empty test files.

4 criteria now: tests passed, convergence, code scope, test coverage.

Generated by thinktank Opus (5 agents, 4 pass, all 3 passing had
identical anti-gaming logic — strong consensus on the approach).

Closes #119

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/architecture.md            |  19 ++++--
 src/scoring/convergence.test.ts | 112 ++++++++++++++++++++++++++++++--
 src/scoring/convergence.ts      |  65 ++++++++++++++----
 src/types.ts                    |   3 +-
 src/utils/display.ts            |  10 +--
 5 files changed, 181 insertions(+), 28 deletions(-)

diff --git a/docs/architecture.md b/docs/architecture.md
index 05be7d5..b761c59 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -119,13 +119,20 @@ The agent with the highest total score is recommended. Ties broken by the first
 
 ### Copeland Pairwise Scoring (alternative)
 
-Enabled with `--scoring copeland`. Instead of assigning absolute point values, Copeland scoring compares every pair of agents head-to-head on three criteria:
+Enabled with `--scoring copeland`. Instead of assigning absolute point values, Copeland scoring compares every pair of agents head-to-head on four criteria:
 
-| Criterion | Better = |
-|-----------|----------|
-| Tests passed | Passed > Failed |
-| Convergence group size | Larger group > Smaller group |
-| Files changed | Fewer files > More files |
+| Criterion | Better = | Notes |
+|-----------|----------|-------|
+| Tests passed | Passed > Failed | |
+| Convergence group size | Larger group > Smaller group | |
+| Non-test files changed | Fewer files > More files | Minimal code scope preferred |
+| Test files added/modified | More files > Fewer files | Capped at 3; only counts when agent also changed non-test files |
+
+Test files are identified by the `*.test.*` or `*.spec.*` pattern in the file path.
+
+**Anti-gaming:** The test files criterion only applies when the agent also changed production (non-test) code. An agent that only adds test files without changing production code receives no test coverage bonus — this prevents gaming the score with empty test padding.
+
+**Cap:** The effective test file count is `min(testFiles, 3)`. This means 1 test file < 2 < 3+, but 3 and 10 are treated equally — adequate coverage is rewarded, but excessive test files don't dominate.
 
 For each pair (A, B):
 1. Count how many criteria A wins vs B wins
diff --git a/src/scoring/convergence.test.ts b/src/scoring/convergence.test.ts
index b9a5c60..48bc699 100644
--- a/src/scoring/convergence.test.ts
+++ b/src/scoring/convergence.test.ts
@@ -294,7 +294,8 @@ describe("copelandRecommend", () => {
       assert.equal(score.copelandTotal, 0, `Agent #${score.agentId} should have Copeland score 0`);
       assert.equal(score.testsWins, 0);
       assert.equal(score.convergenceWins, 0);
-      assert.equal(score.filesChangedWins, 0);
+      assert.equal(score.nonTestFilesWins, 0);
+      assert.equal(score.testFilesWins, 0);
     }
     // Still recommends someone (first agent)
     assert.ok(result.recommended !== null);
@@ -317,9 +318,10 @@ describe("copelandRecommend", () => {
     const convergence = analyzeConvergence(agents);
     const result = copelandRecommend(agents, tests, convergence);
 
-    // Agent 1 vs Agent 2: tests(+1), convergence(-1), files(-1) → Agent 2 wins
-    // Agent 1 vs Agent 3: tests(+1), convergence(-1), files(tie) → tie
-    // Agent 2 vs Agent 3: tests(tie), convergence(tie), files(+1 for 2) → Agent 2 wins
+    // No test files in any agent, so testFiles criterion is always tied
+    // Agent 1 vs Agent 2: tests(+1), convergence(-1), scope(-1), testFiles(tie) → Agent 2 wins
+    // Agent 1 vs Agent 3: tests(+1), convergence(-1), scope(tie), testFiles(tie) → tie
+    // Agent 2 vs Agent 3: tests(tie), convergence(tie), scope(+1 for 2), testFiles(tie) → Agent 2 wins
     // So Agent 2 should have the best Copeland score
     assert.equal(result.recommended, 2);
   });
@@ -375,8 +377,8 @@ describe("copelandRecommend", () => {
     // Score1 wins tests and files, score2 wins neither
     assert.equal(score1.testsWins, 1);
     assert.equal(score2.testsWins, -1);
-    assert.equal(score1.filesChangedWins, 1);
-    assert.equal(score2.filesChangedWins, -1);
+    assert.equal(score1.nonTestFilesWins, 1);
+    assert.equal(score2.nonTestFilesWins, -1);
   });
 
   it("handles single agent", () => {
@@ -387,4 +389,102 @@ describe("copelandRecommend", () => {
     assert.equal(result.scores.length, 1);
     assert.equal(result.scores[0]!.copelandTotal, 0);
   });
+
+  it("agent with tests beats agent without when other criteria tie", () => {
+    // Both agents change 1 prod file + 1 test file, same convergence
+    const agents = [
+      makeAgent({ id: 1, diff: DIFF_A, filesChanged: ["a.ts", "a.test.ts"] }),
+      makeAgent({ id: 2, diff: DIFF_A, filesChanged: ["a.ts"] }),
+    ];
+    const tests = [
+      { agentId: 1, passed: true },
+      { agentId: 2, passed: true },
+    ];
+    const convergence = analyzeConvergence(agents);
+    const result = copelandRecommend(agents, tests, convergence);
+
+    // Agent 1 wins testFiles criterion (+1 vs 0), ties everything else
+    assert.equal(result.recommended, 1);
+    const score1 = result.scores.find((s) => s.agentId === 1);
+    assert.ok(score1);
+    assert.ok(score1.testFilesWins > 0);
+  });
+
+  it("test-only changes do not get test file bonus", () => {
+    // Agent 1 changes only test files (no prod code) — should not get testFiles bonus
+    // Agent 2 changes 1 prod file
+    const agents = [
+      makeAgent({ id: 1, diff: DIFF_A, filesChanged: ["a.test.ts", "b.spec.ts"] }),
+      makeAgent({ id: 2, diff: DIFF_B, filesChanged: ["x.ts"] }),
+    ];
+    const convergence = analyzeConvergence(agents);
+    const result = copelandRecommend(agents, [], convergence);
+
+    const score1 = result.scores.find((s) => s.agentId === 1);
+    const score2 = result.scores.find((s) => s.agentId === 2);
+    assert.ok(score1);
+    assert.ok(score2);
+
+    // Agent 1 has 0 effective test files (no prod changes), Agent 2 also has 0 test files
+    // So testFilesWins should be 0 for both
+    assert.equal(score1.testFilesWins, 0);
+    assert.equal(score2.testFilesWins, 0);
+
+    // Agent 1 has 0 nonTestFiles, Agent 2 has 1 — but fewer is better,
+    // so Agent 1 wins scope. However Agent 2 is not disadvantaged on testFiles.
+  });
+
+  it("test file cap prevents gaming with many test files", () => {
+    // Agent 1: 1 prod file + 10 test files
+    // Agent 2: 1 prod file + 2 test files
+    // Agent 3: 1 prod file + 3 test files
+    // After capping at 3: Agent 1 effective=3, Agent 2 effective=2, Agent 3 effective=3
+    const agents = [
+      makeAgent({
+        id: 1,
+        diff: DIFF_A,
+        filesChanged: [
+          "a.ts",
+          "a.test.ts",
+          "b.test.ts",
+          "c.test.ts",
+          "d.test.ts",
+          "e.test.ts",
+          "f.test.ts",
+          "g.test.ts",
+          "h.test.ts",
+          "i.test.ts",
+          "j.test.ts",
+        ],
+      }),
+      makeAgent({
+        id: 2,
+        diff: DIFF_A,
+        filesChanged: ["a.ts", "a.test.ts", "b.test.ts"],
+      }),
+      makeAgent({
+        id: 3,
+        diff: DIFF_A,
+        filesChanged: ["a.ts", "a.test.ts", "b.test.ts", "c.test.ts"],
+      }),
+    ];
+    const convergence = analyzeConvergence(agents);
+    const result = copelandRecommend(agents, [], convergence);
+
+    const score1 = result.scores.find((s) => s.agentId === 1);
+    const score3 = result.scores.find((s) => s.agentId === 3);
+    assert.ok(score1);
+    assert.ok(score3);
+
+    // Agent 1 (10 test files capped to 3) and Agent 3 (3 test files capped to 3)
+    // should tie on testFiles criterion
+    // Agent 1 vs Agent 3: testFilesWins contribution should be 0 (tie)
+    // In pairwise: A1 effective=3 vs A3 effective=3 → tie on testFiles
+    assert.equal(score1.testFilesWins, score3.testFilesWins);
+
+    // Agent 2 (2 test files) should lose to both Agent 1 and Agent 3 on testFiles
+    const score2 = result.scores.find((s) => s.agentId === 2);
+    assert.ok(score2);
+    assert.ok(score2.testFilesWins < score1.testFilesWins);
+  });
 });
diff --git a/src/scoring/convergence.ts b/src/scoring/convergence.ts
index 8351c49..e7cdae9 100644
--- a/src/scoring/convergence.ts
+++ b/src/scoring/convergence.ts
@@ -177,9 +177,39 @@ export function recommend(
   return { recommended: bestId, scores: agentScores };
 }
 
+const TEST_FILE_PATTERN = /[./](?:test|spec)\./;
+
+/** Cap for test file criterion — prevents gaming with many test files */
+const TEST_FILE_CAP = 3;
+
+/**
+ * Count test files (matching *.test.* or *.spec.*) and non-test files separately.
+ */
+function splitFilesByType(files: string[]): { testFiles: number; nonTestFiles: number } {
+  let testFiles = 0;
+  let nonTestFiles = 0;
+  for (const f of files) {
+    if (TEST_FILE_PATTERN.test(f)) {
+      testFiles++;
+    } else {
+      nonTestFiles++;
+    }
+  }
+  return { testFiles, nonTestFiles };
+}
+
+/**
+ * Effective test file count for scoring: capped at TEST_FILE_CAP, and only
+ * counts when the agent also changed non-test files (prevents gaming).
+ */
+function effectiveTestFiles(testFiles: number, nonTestFiles: number): number {
+  if (nonTestFiles === 0) return 0;
+  return Math.min(testFiles, TEST_FILE_CAP);
+}
+
 /**
  * Copeland pairwise scoring: compare every pair of agents head-to-head
- * on three criteria (tests passed, convergence group size, files changed).
+ * on four criteria (tests passed, convergence group size, non-test files changed, test files).
  * For each pair, the agent winning more criteria gets +1, the loser gets -1, ties get 0.
  * The agent with the highest Copeland score is recommended.
  */
@@ -197,8 +227,9 @@ export function copelandRecommend(
     const testsPassed = test?.passed ? 1 : 0;
     const group = convergence.find((g) => g.agents.includes(agent.id));
     const groupSize = group ? group.agents.length : 0;
-    const filesChanged = agent.filesChanged.length;
-    return { id: agent.id, testsPassed, groupSize, filesChanged };
+    const { testFiles, nonTestFiles } = splitFilesByType(agent.filesChanged);
+    const cappedTestFiles = effectiveTestFiles(testFiles, nonTestFiles);
+    return { id: agent.id, testsPassed, groupSize, nonTestFiles, cappedTestFiles };
   });
 
   // Initialize scores
@@ -208,7 +239,8 @@ export function copelandRecommend(
       agentId: data.id,
       testsWins: 0,
       convergenceWins: 0,
-      filesChangedWins: 0,
+      nonTestFilesWins: 0,
+      testFilesWins: 0,
       copelandTotal: 0,
     });
   }
@@ -244,15 +276,26 @@ export function copelandRecommend(
         scoreMap.get(a.id)!.convergenceWins--;
       }
 
-      // Criterion 3: files changed (fewer is better — minimal changes preferred)
-      if (a.filesChanged < b.filesChanged) {
+      // Criterion 3: non-test files changed (fewer is better — minimal code scope)
+      if (a.nonTestFiles < b.nonTestFiles) {
+        aWins++;
+        scoreMap.get(a.id)!.nonTestFilesWins++;
+        scoreMap.get(b.id)!.nonTestFilesWins--;
+      } else if (b.nonTestFiles < a.nonTestFiles) {
+        bWins++;
+        scoreMap.get(b.id)!.nonTestFilesWins++;
+        scoreMap.get(a.id)!.nonTestFilesWins--;
+      }
+
+      // Criterion 4: test files added/modified (more is better, capped, only with prod changes)
+      if (a.cappedTestFiles > b.cappedTestFiles) {
         aWins++;
-        scoreMap.get(a.id)!.filesChangedWins++;
-        scoreMap.get(b.id)!.filesChangedWins--;
-      } else if (b.filesChanged < a.filesChanged) {
+        scoreMap.get(a.id)!.testFilesWins++;
+        scoreMap.get(b.id)!.testFilesWins--;
+      } else if (b.cappedTestFiles > a.cappedTestFiles) {
         bWins++;
-        scoreMap.get(b.id)!.filesChangedWins++;
-        scoreMap.get(a.id)!.filesChangedWins--;
+        scoreMap.get(b.id)!.testFilesWins++;
+        scoreMap.get(a.id)!.testFilesWins--;
       }
 
       // Overall Copeland: winner of more criteria gets +1, loser -1
diff --git a/src/types.ts b/src/types.ts
index ded3549..a9c2f6b 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -53,7 +53,8 @@ export interface CopelandScore {
   agentId: number;
   testsWins: number;
   convergenceWins: number;
-  filesChangedWins: number;
+  nonTestFilesWins: number;
+  testFilesWins: number;
   copelandTotal: number;
 }
 
diff --git a/src/utils/display.ts b/src/utils/display.ts
index 6f6c6fa..c1e9faa 100644
--- a/src/utils/display.ts
+++ b/src/utils/display.ts
@@ -107,10 +107,11 @@ export function displayResults(result: EnsembleResult): void {
         padRight("Agent", 8) +
         padRight("Tests", 10) +
         padRight("Converge", 10) +
-        padRight("Files", 10) +
+        padRight("Scope", 10) +
+        padRight("TestCov", 10) +
         padRight("Copeland", 10),
     );
-    console.log("  " + pc.dim("─".repeat(48)));
+    console.log("  " + pc.dim("─".repeat(58)));
 
     for (const score of result.copelandScores) {
       const isRecommended = result.scoring === "copeland" && result.recommended === score.agentId;
@@ -121,7 +122,8 @@ export function displayResults(result: EnsembleResult): void {
           padRight(`#${score.agentId}`, 8) +
           padRight(fmt(score.testsWins), 10) +
           padRight(fmt(score.convergenceWins), 10) +
-          padRight(fmt(score.filesChangedWins), 10) +
+          padRight(fmt(score.nonTestFilesWins), 10) +
+          padRight(fmt(score.testFilesWins), 10) +
           padRight(fmt(score.copelandTotal), 10),
       );
     }
@@ -133,7 +135,7 @@ export function displayResults(result: EnsembleResult): void {
     const method = result.scoring === "copeland" ? "Copeland pairwise" : "weighted";
     console.log(
       pc.cyan(`  Recommended: Agent #${result.recommended}`) +
-        pc.dim(` (${method} scoring: tests + convergence + diff size)`),
+        pc.dim(` (${method} scoring: tests + convergence + scope + test coverage)`),
     );
     console.log();
   }