From 00bfcbd8da68387ac1d2fb22df494093a5b4ab82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 15 Jun 2026 13:14:38 +0200 Subject: [PATCH 1/2] fix: propagate model/variant/thinking to sweep aggregate summary The aggregate sweep rollup step (run_sweep_summary) was using raw 'opencode run' without resolving CodeCome's model configuration, which meant CODECOME_MODEL, CODECOME_MODEL_VARIANT, OPENCODE_ARGS, and codecome.yml agent pinning were all ignored for the aggregate step while the per-file sweeps honoured them correctly. Now resolve_runtime_config('auditor') is called and the resolved model, variant, and thinking flags are passed explicitly to the raw opencode run command, matching the per-file sweep behaviour. Also prints the resolution source in the banner for visibility. Adds three regression tests covering model+variant, thinking-only, and nothing-resolved scenarios. --- tests/test_run_sweep.py | 138 ++++++++++++++++++++++++++++++++++++++++ tools/run-sweep.py | 20 +++++- 2 files changed, 157 insertions(+), 1 deletion(-) diff --git a/tests/test_run_sweep.py b/tests/test_run_sweep.py index e84f068..edeb01c 100644 --- a/tests/test_run_sweep.py +++ b/tests/test_run_sweep.py @@ -223,3 +223,141 @@ def test_build_sweep_summary_prompt_raises_when_missing(self, tmp_path): module.build_sweep_summary_prompt(["src/foo.php"], []) finally: module.SWEEP_SUMMARY_PROMPT = ROOT / "prompts" / "phase-2-sweep-summary.md" + + +class TestRunSweepSummaryModelPropagation: + """run_sweep_summary passes resolved model/variant/thinking to raw opencode run.""" + + def _setup_summary_env(self, module, tmp_path, monkeypatch): + """Configure temporary SWEEP_SUMMARY_PROMPT, TMP_DIR, and ROOT for a test. + + Returns (orig_swp, orig_tmp_dir, orig_root) for cleanup in the caller. + """ + real_template = module.SWEEP_SUMMARY_PROMPT.read_text(encoding="utf-8") + tmp_template = tmp_path / "phase-2-sweep-summary.md" + tmp_template.write_text(real_template, encoding="utf-8") + orig_swp = module.SWEEP_SUMMARY_PROMPT + module.SWEEP_SUMMARY_PROMPT = tmp_template + orig_tmp_dir = module.TMP_DIR + module.TMP_DIR = tmp_path / "tmp" / "file-sweep-prompts" + orig_root = module.ROOT + module.ROOT = tmp_path + return orig_swp, orig_tmp_dir, orig_root + + def _teardown_summary_env(self, module, orig_swp, orig_tmp_dir, orig_root): + module.SWEEP_SUMMARY_PROMPT = orig_swp + module.TMP_DIR = orig_tmp_dir + module.ROOT = orig_root + + def test_passes_model_and_variant_flags(self, tmp_path, monkeypatch): + """When model and variant are resolved, --model and --variant appear in the command.""" + module = _load_run_sweep() + orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path, monkeypatch) + + from codecome.config import RuntimeConfig + + fake_rc = RuntimeConfig( + model="test-provider/test-model", + variant="test-variant", + model_source="env CODECOME_MODEL", + variant_source="env CODECOME_MODEL_VARIANT", + thinking_on=False, + thinking_source="env", + ) + monkeypatch.setattr(module, "resolve_runtime_config", lambda agent: fake_rc) + + captured_commands: list[list[str]] = [] + def fake_run(command, **kwargs): + captured_commands.append(list(command)) + return module.subprocess.CompletedProcess(command, 0) + + monkeypatch.setattr(module.subprocess, "run", fake_run) + + try: + code = module.run_sweep_summary(["src/a.php"], []) + assert code == 0 + assert len(captured_commands) == 1 + cmd = captured_commands[0] + assert "--model" in cmd + model_idx = cmd.index("--model") + assert cmd[model_idx + 1] == "test-provider/test-model" + assert "--variant" in cmd + variant_idx = cmd.index("--variant") + assert cmd[variant_idx + 1] == "test-variant" + assert "--thinking" not in cmd + assert cmd[0] == "opencode" + assert cmd[1] == "run" + assert "--agent" in cmd and cmd[cmd.index("--agent") + 1] == "auditor" + finally: + self._teardown_summary_env(module, orig_swp, orig_tmp_dir, orig_root) + + def test_passes_thinking_flag_when_on(self, tmp_path, monkeypatch): + """When thinking is on, --thinking appears in the command.""" + module = _load_run_sweep() + orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path, monkeypatch) + + from codecome.config import RuntimeConfig + + fake_rc = RuntimeConfig( + model=None, + variant=None, + model_source="(unknown)", + variant_source="(unknown)", + thinking_on=True, + thinking_source="provider-default", + ) + monkeypatch.setattr(module, "resolve_runtime_config", lambda agent: fake_rc) + + captured_commands: list[list[str]] = [] + def fake_run(command, **kwargs): + captured_commands.append(list(command)) + return module.subprocess.CompletedProcess(command, 0) + + monkeypatch.setattr(module.subprocess, "run", fake_run) + + try: + code = module.run_sweep_summary(["src/b.cs"], []) + assert code == 0 + assert len(captured_commands) == 1 + cmd = captured_commands[0] + assert "--thinking" in cmd + assert "--model" not in cmd + assert "--variant" not in cmd + finally: + self._teardown_summary_env(module, orig_swp, orig_tmp_dir, orig_root) + + def test_no_flags_when_nothing_resolved(self, tmp_path, monkeypatch): + """When model/variant are None and thinking is off, no extra flags are passed.""" + module = _load_run_sweep() + orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path, monkeypatch) + + from codecome.config import RuntimeConfig + + fake_rc = RuntimeConfig( + model=None, + variant=None, + model_source="(unknown)", + variant_source="(unknown)", + thinking_on=False, + thinking_source="env", + ) + monkeypatch.setattr(module, "resolve_runtime_config", lambda agent: fake_rc) + + captured_commands: list[list[str]] = [] + def fake_run(command, **kwargs): + captured_commands.append(list(command)) + return module.subprocess.CompletedProcess(command, 0) + + monkeypatch.setattr(module.subprocess, "run", fake_run) + + try: + code = module.run_sweep_summary(["src/c.py"], []) + assert code == 0 + assert len(captured_commands) == 1 + cmd = captured_commands[0] + assert "--model" not in cmd + assert "--variant" not in cmd + assert "--thinking" not in cmd + assert cmd[:3] == ["opencode", "run", "--agent"] + finally: + self._teardown_summary_env(module, orig_swp, orig_tmp_dir, orig_root) diff --git a/tools/run-sweep.py b/tools/run-sweep.py index 8e0b0e3..f1f93a8 100755 --- a/tools/run-sweep.py +++ b/tools/run-sweep.py @@ -35,6 +35,7 @@ sys.path.insert(0, str(Path(__file__).resolve().parent)) import _colors as C +from codecome.config import resolve_runtime_config ROOT = Path(__file__).resolve().parents[1] DEFAULT_INDEX = ROOT / "itemdb" / "notes" / "file-risk-index.yml" @@ -197,13 +198,30 @@ def run_sweep_summary(files: list[str], per_file_summaries: list[str]) -> int: not a phase-mode run — it does not participate in the Phase 2 completion gate and ``run-agent.py`` does not currently support non-phase utility prompts. + + Model and variant resolution mirrors the per-file sweep runs so + that ``CODECOME_MODEL``, ``CODECOME_MODEL_VARIANT``, ``OPENCODE_ARGS``, + and ``codecome.yml`` agent pinning are all honoured for the aggregate + step. """ + rc = resolve_runtime_config("auditor") prompt_path = build_sweep_summary_prompt(files, per_file_summaries) print(C.header("Sweep Summary (Aggregate Rollup)")) print(f"Prompt: {prompt_path.relative_to(ROOT)}") + print(f" agent=auditor model={rc.model or '(unknown)'} " + f"variant={rc.variant or '(unknown)'} " + f"thinking={'on' if rc.thinking_on else 'off'}" + f" (model source: {rc.model_source}, variant source: {rc.variant_source})") prompt = prompt_path.read_text(encoding="utf-8") - command = ["opencode", "run", "--agent", "auditor", prompt] + command = ["opencode", "run", "--agent", "auditor"] + if rc.model: + command.extend(["--model", rc.model]) + if rc.variant: + command.extend(["--variant", rc.variant]) + if rc.thinking_on: + command.append("--thinking") + command.append(prompt) result = subprocess.run(command, cwd=ROOT) return int(result.returncode) From bc4774ab691256caed4dde8119f7ff04a588edc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 15 Jun 2026 13:43:15 +0200 Subject: [PATCH 2/2] chore: address review feedback - drop dead monkeypatch param, assert agent role in resolver stubs --- tests/test_run_sweep.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/tests/test_run_sweep.py b/tests/test_run_sweep.py index edeb01c..bc9dd1f 100644 --- a/tests/test_run_sweep.py +++ b/tests/test_run_sweep.py @@ -228,7 +228,7 @@ def test_build_sweep_summary_prompt_raises_when_missing(self, tmp_path): class TestRunSweepSummaryModelPropagation: """run_sweep_summary passes resolved model/variant/thinking to raw opencode run.""" - def _setup_summary_env(self, module, tmp_path, monkeypatch): + def _setup_summary_env(self, module, tmp_path): """Configure temporary SWEEP_SUMMARY_PROMPT, TMP_DIR, and ROOT for a test. Returns (orig_swp, orig_tmp_dir, orig_root) for cleanup in the caller. @@ -252,7 +252,7 @@ def _teardown_summary_env(self, module, orig_swp, orig_tmp_dir, orig_root): def test_passes_model_and_variant_flags(self, tmp_path, monkeypatch): """When model and variant are resolved, --model and --variant appear in the command.""" module = _load_run_sweep() - orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path, monkeypatch) + orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path) from codecome.config import RuntimeConfig @@ -264,7 +264,10 @@ def test_passes_model_and_variant_flags(self, tmp_path, monkeypatch): thinking_on=False, thinking_source="env", ) - monkeypatch.setattr(module, "resolve_runtime_config", lambda agent: fake_rc) + def fake_resolver(agent: str): + assert agent == "auditor", f"Expected auditor agent, got {agent!r}" + return fake_rc + monkeypatch.setattr(module, "resolve_runtime_config", fake_resolver) captured_commands: list[list[str]] = [] def fake_run(command, **kwargs): @@ -294,7 +297,7 @@ def fake_run(command, **kwargs): def test_passes_thinking_flag_when_on(self, tmp_path, monkeypatch): """When thinking is on, --thinking appears in the command.""" module = _load_run_sweep() - orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path, monkeypatch) + orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path) from codecome.config import RuntimeConfig @@ -306,7 +309,10 @@ def test_passes_thinking_flag_when_on(self, tmp_path, monkeypatch): thinking_on=True, thinking_source="provider-default", ) - monkeypatch.setattr(module, "resolve_runtime_config", lambda agent: fake_rc) + def fake_resolver(agent: str): + assert agent == "auditor", f"Expected auditor agent, got {agent!r}" + return fake_rc + monkeypatch.setattr(module, "resolve_runtime_config", fake_resolver) captured_commands: list[list[str]] = [] def fake_run(command, **kwargs): @@ -329,7 +335,7 @@ def fake_run(command, **kwargs): def test_no_flags_when_nothing_resolved(self, tmp_path, monkeypatch): """When model/variant are None and thinking is off, no extra flags are passed.""" module = _load_run_sweep() - orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path, monkeypatch) + orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path) from codecome.config import RuntimeConfig @@ -341,7 +347,10 @@ def test_no_flags_when_nothing_resolved(self, tmp_path, monkeypatch): thinking_on=False, thinking_source="env", ) - monkeypatch.setattr(module, "resolve_runtime_config", lambda agent: fake_rc) + def fake_resolver(agent: str): + assert agent == "auditor", f"Expected auditor agent, got {agent!r}" + return fake_rc + monkeypatch.setattr(module, "resolve_runtime_config", fake_resolver) captured_commands: list[list[str]] = [] def fake_run(command, **kwargs):