diff --git a/tests/test_run_sweep.py b/tests/test_run_sweep.py index e84f068..bc9dd1f 100644 --- a/tests/test_run_sweep.py +++ b/tests/test_run_sweep.py @@ -223,3 +223,150 @@ def test_build_sweep_summary_prompt_raises_when_missing(self, tmp_path): module.build_sweep_summary_prompt(["src/foo.php"], []) finally: module.SWEEP_SUMMARY_PROMPT = ROOT / "prompts" / "phase-2-sweep-summary.md" + + +class TestRunSweepSummaryModelPropagation: + """run_sweep_summary passes resolved model/variant/thinking to raw opencode run.""" + + def _setup_summary_env(self, module, tmp_path): + """Configure temporary SWEEP_SUMMARY_PROMPT, TMP_DIR, and ROOT for a test. + + Returns (orig_swp, orig_tmp_dir, orig_root) for cleanup in the caller. + """ + real_template = module.SWEEP_SUMMARY_PROMPT.read_text(encoding="utf-8") + tmp_template = tmp_path / "phase-2-sweep-summary.md" + tmp_template.write_text(real_template, encoding="utf-8") + orig_swp = module.SWEEP_SUMMARY_PROMPT + module.SWEEP_SUMMARY_PROMPT = tmp_template + orig_tmp_dir = module.TMP_DIR + module.TMP_DIR = tmp_path / "tmp" / "file-sweep-prompts" + orig_root = module.ROOT + module.ROOT = tmp_path + return orig_swp, orig_tmp_dir, orig_root + + def _teardown_summary_env(self, module, orig_swp, orig_tmp_dir, orig_root): + module.SWEEP_SUMMARY_PROMPT = orig_swp + module.TMP_DIR = orig_tmp_dir + module.ROOT = orig_root + + def test_passes_model_and_variant_flags(self, tmp_path, monkeypatch): + """When model and variant are resolved, --model and --variant appear in the command.""" + module = _load_run_sweep() + orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path) + + from codecome.config import RuntimeConfig + + fake_rc = RuntimeConfig( + model="test-provider/test-model", + variant="test-variant", + model_source="env CODECOME_MODEL", + variant_source="env CODECOME_MODEL_VARIANT", + thinking_on=False, + thinking_source="env", + ) + def fake_resolver(agent: str): + assert agent == "auditor", f"Expected auditor agent, got {agent!r}" + return fake_rc + monkeypatch.setattr(module, "resolve_runtime_config", fake_resolver) + + captured_commands: list[list[str]] = [] + def fake_run(command, **kwargs): + captured_commands.append(list(command)) + return module.subprocess.CompletedProcess(command, 0) + + monkeypatch.setattr(module.subprocess, "run", fake_run) + + try: + code = module.run_sweep_summary(["src/a.php"], []) + assert code == 0 + assert len(captured_commands) == 1 + cmd = captured_commands[0] + assert "--model" in cmd + model_idx = cmd.index("--model") + assert cmd[model_idx + 1] == "test-provider/test-model" + assert "--variant" in cmd + variant_idx = cmd.index("--variant") + assert cmd[variant_idx + 1] == "test-variant" + assert "--thinking" not in cmd + assert cmd[0] == "opencode" + assert cmd[1] == "run" + assert "--agent" in cmd and cmd[cmd.index("--agent") + 1] == "auditor" + finally: + self._teardown_summary_env(module, orig_swp, orig_tmp_dir, orig_root) + + def test_passes_thinking_flag_when_on(self, tmp_path, monkeypatch): + """When thinking is on, --thinking appears in the command.""" + module = _load_run_sweep() + orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path) + + from codecome.config import RuntimeConfig + + fake_rc = RuntimeConfig( + model=None, + variant=None, + model_source="(unknown)", + variant_source="(unknown)", + thinking_on=True, + thinking_source="provider-default", + ) + def fake_resolver(agent: str): + assert agent == "auditor", f"Expected auditor agent, got {agent!r}" + return fake_rc + monkeypatch.setattr(module, "resolve_runtime_config", fake_resolver) + + captured_commands: list[list[str]] = [] + def fake_run(command, **kwargs): + captured_commands.append(list(command)) + return module.subprocess.CompletedProcess(command, 0) + + monkeypatch.setattr(module.subprocess, "run", fake_run) + + try: + code = module.run_sweep_summary(["src/b.cs"], []) + assert code == 0 + assert len(captured_commands) == 1 + cmd = captured_commands[0] + assert "--thinking" in cmd + assert "--model" not in cmd + assert "--variant" not in cmd + finally: + self._teardown_summary_env(module, orig_swp, orig_tmp_dir, orig_root) + + def test_no_flags_when_nothing_resolved(self, tmp_path, monkeypatch): + """When model/variant are None and thinking is off, no extra flags are passed.""" + module = _load_run_sweep() + orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path) + + from codecome.config import RuntimeConfig + + fake_rc = RuntimeConfig( + model=None, + variant=None, + model_source="(unknown)", + variant_source="(unknown)", + thinking_on=False, + thinking_source="env", + ) + def fake_resolver(agent: str): + assert agent == "auditor", f"Expected auditor agent, got {agent!r}" + return fake_rc + monkeypatch.setattr(module, "resolve_runtime_config", fake_resolver) + + captured_commands: list[list[str]] = [] + def fake_run(command, **kwargs): + captured_commands.append(list(command)) + return module.subprocess.CompletedProcess(command, 0) + + monkeypatch.setattr(module.subprocess, "run", fake_run) + + try: + code = module.run_sweep_summary(["src/c.py"], []) + assert code == 0 + assert len(captured_commands) == 1 + cmd = captured_commands[0] + assert "--model" not in cmd + assert "--variant" not in cmd + assert "--thinking" not in cmd + assert cmd[:3] == ["opencode", "run", "--agent"] + finally: + self._teardown_summary_env(module, orig_swp, orig_tmp_dir, orig_root) diff --git a/tools/run-sweep.py b/tools/run-sweep.py index 8e0b0e3..f1f93a8 100755 --- a/tools/run-sweep.py +++ b/tools/run-sweep.py @@ -35,6 +35,7 @@ sys.path.insert(0, str(Path(__file__).resolve().parent)) import _colors as C +from codecome.config import resolve_runtime_config ROOT = Path(__file__).resolve().parents[1] DEFAULT_INDEX = ROOT / "itemdb" / "notes" / "file-risk-index.yml" @@ -197,13 +198,30 @@ def run_sweep_summary(files: list[str], per_file_summaries: list[str]) -> int: not a phase-mode run — it does not participate in the Phase 2 completion gate and ``run-agent.py`` does not currently support non-phase utility prompts. + + Model and variant resolution mirrors the per-file sweep runs so + that ``CODECOME_MODEL``, ``CODECOME_MODEL_VARIANT``, ``OPENCODE_ARGS``, + and ``codecome.yml`` agent pinning are all honoured for the aggregate + step. """ + rc = resolve_runtime_config("auditor") prompt_path = build_sweep_summary_prompt(files, per_file_summaries) print(C.header("Sweep Summary (Aggregate Rollup)")) print(f"Prompt: {prompt_path.relative_to(ROOT)}") + print(f" agent=auditor model={rc.model or '(unknown)'} " + f"variant={rc.variant or '(unknown)'} " + f"thinking={'on' if rc.thinking_on else 'off'}" + f" (model source: {rc.model_source}, variant source: {rc.variant_source})") prompt = prompt_path.read_text(encoding="utf-8") - command = ["opencode", "run", "--agent", "auditor", prompt] + command = ["opencode", "run", "--agent", "auditor"] + if rc.model: + command.extend(["--model", rc.model]) + if rc.variant: + command.extend(["--variant", rc.variant]) + if rc.thinking_on: + command.append("--thinking") + command.append(prompt) result = subprocess.run(command, cwd=ROOT) return int(result.returncode)