From 00bfcbd8da68387ac1d2fb22df494093a5b4ab82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?=
 <pruiz@users.noreply.github.com>
Date: Mon, 15 Jun 2026 13:14:38 +0200
Subject: [PATCH 1/2] fix: propagate model/variant/thinking to sweep aggregate
 summary

The aggregate sweep rollup step (run_sweep_summary) was using raw
'opencode run' without resolving CodeCome's model configuration,
which meant CODECOME_MODEL, CODECOME_MODEL_VARIANT, OPENCODE_ARGS,
and codecome.yml agent pinning were all ignored for the aggregate
step while the per-file sweeps honoured them correctly.

Now resolve_runtime_config('auditor') is called and the resolved
model, variant, and thinking flags are passed explicitly to the
raw opencode run command, matching the per-file sweep behaviour.

Also prints the resolution source in the banner for visibility.

Adds three regression tests covering model+variant, thinking-only,
and nothing-resolved scenarios.
---
 tests/test_run_sweep.py | 138 ++++++++++++++++++++++++++++++++++++++++
 tools/run-sweep.py      |  20 +++++-
 2 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/tests/test_run_sweep.py b/tests/test_run_sweep.py
index e84f068..edeb01c 100644
--- a/tests/test_run_sweep.py
+++ b/tests/test_run_sweep.py
@@ -223,3 +223,141 @@ def test_build_sweep_summary_prompt_raises_when_missing(self, tmp_path):
                 module.build_sweep_summary_prompt(["src/foo.php"], [])
         finally:
             module.SWEEP_SUMMARY_PROMPT = ROOT / "prompts" / "phase-2-sweep-summary.md"
+
+
+class TestRunSweepSummaryModelPropagation:
+    """run_sweep_summary passes resolved model/variant/thinking to raw opencode run."""
+
+    def _setup_summary_env(self, module, tmp_path, monkeypatch):
+        """Configure temporary SWEEP_SUMMARY_PROMPT, TMP_DIR, and ROOT for a test.
+
+        Returns (orig_swp, orig_tmp_dir, orig_root) for cleanup in the caller.
+        """
+        real_template = module.SWEEP_SUMMARY_PROMPT.read_text(encoding="utf-8")
+        tmp_template = tmp_path / "phase-2-sweep-summary.md"
+        tmp_template.write_text(real_template, encoding="utf-8")
+        orig_swp = module.SWEEP_SUMMARY_PROMPT
+        module.SWEEP_SUMMARY_PROMPT = tmp_template
+        orig_tmp_dir = module.TMP_DIR
+        module.TMP_DIR = tmp_path / "tmp" / "file-sweep-prompts"
+        orig_root = module.ROOT
+        module.ROOT = tmp_path
+        return orig_swp, orig_tmp_dir, orig_root
+
+    def _teardown_summary_env(self, module, orig_swp, orig_tmp_dir, orig_root):
+        module.SWEEP_SUMMARY_PROMPT = orig_swp
+        module.TMP_DIR = orig_tmp_dir
+        module.ROOT = orig_root
+
+    def test_passes_model_and_variant_flags(self, tmp_path, monkeypatch):
+        """When model and variant are resolved, --model and --variant appear in the command."""
+        module = _load_run_sweep()
+        orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path, monkeypatch)
+
+        from codecome.config import RuntimeConfig
+
+        fake_rc = RuntimeConfig(
+            model="test-provider/test-model",
+            variant="test-variant",
+            model_source="env CODECOME_MODEL",
+            variant_source="env CODECOME_MODEL_VARIANT",
+            thinking_on=False,
+            thinking_source="env",
+        )
+        monkeypatch.setattr(module, "resolve_runtime_config", lambda agent: fake_rc)
+
+        captured_commands: list[list[str]] = []
+        def fake_run(command, **kwargs):
+            captured_commands.append(list(command))
+            return module.subprocess.CompletedProcess(command, 0)
+
+        monkeypatch.setattr(module.subprocess, "run", fake_run)
+
+        try:
+            code = module.run_sweep_summary(["src/a.php"], [])
+            assert code == 0
+            assert len(captured_commands) == 1
+            cmd = captured_commands[0]
+            assert "--model" in cmd
+            model_idx = cmd.index("--model")
+            assert cmd[model_idx + 1] == "test-provider/test-model"
+            assert "--variant" in cmd
+            variant_idx = cmd.index("--variant")
+            assert cmd[variant_idx + 1] == "test-variant"
+            assert "--thinking" not in cmd
+            assert cmd[0] == "opencode"
+            assert cmd[1] == "run"
+            assert "--agent" in cmd and cmd[cmd.index("--agent") + 1] == "auditor"
+        finally:
+            self._teardown_summary_env(module, orig_swp, orig_tmp_dir, orig_root)
+
+    def test_passes_thinking_flag_when_on(self, tmp_path, monkeypatch):
+        """When thinking is on, --thinking appears in the command."""
+        module = _load_run_sweep()
+        orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path, monkeypatch)
+
+        from codecome.config import RuntimeConfig
+
+        fake_rc = RuntimeConfig(
+            model=None,
+            variant=None,
+            model_source="(unknown)",
+            variant_source="(unknown)",
+            thinking_on=True,
+            thinking_source="provider-default",
+        )
+        monkeypatch.setattr(module, "resolve_runtime_config", lambda agent: fake_rc)
+
+        captured_commands: list[list[str]] = []
+        def fake_run(command, **kwargs):
+            captured_commands.append(list(command))
+            return module.subprocess.CompletedProcess(command, 0)
+
+        monkeypatch.setattr(module.subprocess, "run", fake_run)
+
+        try:
+            code = module.run_sweep_summary(["src/b.cs"], [])
+            assert code == 0
+            assert len(captured_commands) == 1
+            cmd = captured_commands[0]
+            assert "--thinking" in cmd
+            assert "--model" not in cmd
+            assert "--variant" not in cmd
+        finally:
+            self._teardown_summary_env(module, orig_swp, orig_tmp_dir, orig_root)
+
+    def test_no_flags_when_nothing_resolved(self, tmp_path, monkeypatch):
+        """When model/variant are None and thinking is off, no extra flags are passed."""
+        module = _load_run_sweep()
+        orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path, monkeypatch)
+
+        from codecome.config import RuntimeConfig
+
+        fake_rc = RuntimeConfig(
+            model=None,
+            variant=None,
+            model_source="(unknown)",
+            variant_source="(unknown)",
+            thinking_on=False,
+            thinking_source="env",
+        )
+        monkeypatch.setattr(module, "resolve_runtime_config", lambda agent: fake_rc)
+
+        captured_commands: list[list[str]] = []
+        def fake_run(command, **kwargs):
+            captured_commands.append(list(command))
+            return module.subprocess.CompletedProcess(command, 0)
+
+        monkeypatch.setattr(module.subprocess, "run", fake_run)
+
+        try:
+            code = module.run_sweep_summary(["src/c.py"], [])
+            assert code == 0
+            assert len(captured_commands) == 1
+            cmd = captured_commands[0]
+            assert "--model" not in cmd
+            assert "--variant" not in cmd
+            assert "--thinking" not in cmd
+            assert cmd[:3] == ["opencode", "run", "--agent"]
+        finally:
+            self._teardown_summary_env(module, orig_swp, orig_tmp_dir, orig_root)
diff --git a/tools/run-sweep.py b/tools/run-sweep.py
index 8e0b0e3..f1f93a8 100755
--- a/tools/run-sweep.py
+++ b/tools/run-sweep.py
@@ -35,6 +35,7 @@
 sys.path.insert(0, str(Path(__file__).resolve().parent))
 
 import _colors as C
+from codecome.config import resolve_runtime_config
 
 ROOT = Path(__file__).resolve().parents[1]
 DEFAULT_INDEX = ROOT / "itemdb" / "notes" / "file-risk-index.yml"
@@ -197,13 +198,30 @@ def run_sweep_summary(files: list[str], per_file_summaries: list[str]) -> int:
     not a phase-mode run — it does not participate in the Phase 2
     completion gate and ``run-agent.py`` does not currently support
     non-phase utility prompts.
+
+    Model and variant resolution mirrors the per-file sweep runs so
+    that ``CODECOME_MODEL``, ``CODECOME_MODEL_VARIANT``, ``OPENCODE_ARGS``,
+    and ``codecome.yml`` agent pinning are all honoured for the aggregate
+    step.
     """
+    rc = resolve_runtime_config("auditor")
     prompt_path = build_sweep_summary_prompt(files, per_file_summaries)
     print(C.header("Sweep Summary (Aggregate Rollup)"))
     print(f"Prompt: {prompt_path.relative_to(ROOT)}")
+    print(f"  agent=auditor  model={rc.model or '(unknown)'}  "
+          f"variant={rc.variant or '(unknown)'}  "
+          f"thinking={'on' if rc.thinking_on else 'off'}"
+          f"  (model source: {rc.model_source}, variant source: {rc.variant_source})")
 
     prompt = prompt_path.read_text(encoding="utf-8")
-    command = ["opencode", "run", "--agent", "auditor", prompt]
+    command = ["opencode", "run", "--agent", "auditor"]
+    if rc.model:
+        command.extend(["--model", rc.model])
+    if rc.variant:
+        command.extend(["--variant", rc.variant])
+    if rc.thinking_on:
+        command.append("--thinking")
+    command.append(prompt)
     result = subprocess.run(command, cwd=ROOT)
     return int(result.returncode)
 

From bc4774ab691256caed4dde8119f7ff04a588edc8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?=
 <pruiz@users.noreply.github.com>
Date: Mon, 15 Jun 2026 13:43:15 +0200
Subject: [PATCH 2/2] chore: address review feedback - drop dead monkeypatch
 param, assert agent role in resolver stubs

---
 tests/test_run_sweep.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/tests/test_run_sweep.py b/tests/test_run_sweep.py
index edeb01c..bc9dd1f 100644
--- a/tests/test_run_sweep.py
+++ b/tests/test_run_sweep.py
@@ -228,7 +228,7 @@ def test_build_sweep_summary_prompt_raises_when_missing(self, tmp_path):
 class TestRunSweepSummaryModelPropagation:
     """run_sweep_summary passes resolved model/variant/thinking to raw opencode run."""
 
-    def _setup_summary_env(self, module, tmp_path, monkeypatch):
+    def _setup_summary_env(self, module, tmp_path):
         """Configure temporary SWEEP_SUMMARY_PROMPT, TMP_DIR, and ROOT for a test.
 
         Returns (orig_swp, orig_tmp_dir, orig_root) for cleanup in the caller.
@@ -252,7 +252,7 @@ def _teardown_summary_env(self, module, orig_swp, orig_tmp_dir, orig_root):
     def test_passes_model_and_variant_flags(self, tmp_path, monkeypatch):
         """When model and variant are resolved, --model and --variant appear in the command."""
         module = _load_run_sweep()
-        orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path, monkeypatch)
+        orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path)
 
         from codecome.config import RuntimeConfig
 
@@ -264,7 +264,10 @@ def test_passes_model_and_variant_flags(self, tmp_path, monkeypatch):
             thinking_on=False,
             thinking_source="env",
         )
-        monkeypatch.setattr(module, "resolve_runtime_config", lambda agent: fake_rc)
+        def fake_resolver(agent: str):
+            assert agent == "auditor", f"Expected auditor agent, got {agent!r}"
+            return fake_rc
+        monkeypatch.setattr(module, "resolve_runtime_config", fake_resolver)
 
         captured_commands: list[list[str]] = []
         def fake_run(command, **kwargs):
@@ -294,7 +297,7 @@ def fake_run(command, **kwargs):
     def test_passes_thinking_flag_when_on(self, tmp_path, monkeypatch):
         """When thinking is on, --thinking appears in the command."""
         module = _load_run_sweep()
-        orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path, monkeypatch)
+        orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path)
 
         from codecome.config import RuntimeConfig
 
@@ -306,7 +309,10 @@ def test_passes_thinking_flag_when_on(self, tmp_path, monkeypatch):
             thinking_on=True,
             thinking_source="provider-default",
         )
-        monkeypatch.setattr(module, "resolve_runtime_config", lambda agent: fake_rc)
+        def fake_resolver(agent: str):
+            assert agent == "auditor", f"Expected auditor agent, got {agent!r}"
+            return fake_rc
+        monkeypatch.setattr(module, "resolve_runtime_config", fake_resolver)
 
         captured_commands: list[list[str]] = []
         def fake_run(command, **kwargs):
@@ -329,7 +335,7 @@ def fake_run(command, **kwargs):
     def test_no_flags_when_nothing_resolved(self, tmp_path, monkeypatch):
         """When model/variant are None and thinking is off, no extra flags are passed."""
         module = _load_run_sweep()
-        orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path, monkeypatch)
+        orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path)
 
         from codecome.config import RuntimeConfig
 
@@ -341,7 +347,10 @@ def test_no_flags_when_nothing_resolved(self, tmp_path, monkeypatch):
             thinking_on=False,
             thinking_source="env",
         )
-        monkeypatch.setattr(module, "resolve_runtime_config", lambda agent: fake_rc)
+        def fake_resolver(agent: str):
+            assert agent == "auditor", f"Expected auditor agent, got {agent!r}"
+            return fake_rc
+        monkeypatch.setattr(module, "resolve_runtime_config", fake_resolver)
 
         captured_commands: list[list[str]] = []
         def fake_run(command, **kwargs):