diff --git a/docs/adr/0002-ai-execution-strategy.md b/docs/adr/0002-ai-execution-strategy.md index 3b87dd0..7ec1a27 100644 --- a/docs/adr/0002-ai-execution-strategy.md +++ b/docs/adr/0002-ai-execution-strategy.md @@ -39,7 +39,7 @@ Implemented safety behavior: - If `copilot-cli` is requested without either token secret, the workflow logs a warning and uses `AI_PROVIDER=fake`. - [../../src/wazzup/ai.py](../../src/wazzup/ai.py) checks for `COPILOT_GITHUB_TOKEN` in GitHub Actions and raises an actionable error if the workflow guard is bypassed. - The provider defaults to model `claude-sonnet-4.6` and the repo-local `wazzup-writer` custom agent, both overridable through environment variables. -- Copilot CLI stdout/stderr is captured and included in sanitized failure diagnostics when the CLI exits non-zero. +- Copilot CLI runtime failures are retried, and stdout/stderr is captured and included in sanitized failure diagnostics when all attempts exit non-zero. ## Consequences @@ -57,7 +57,7 @@ Implemented safety behavior: - CLI behavior can be less predictable than a direct structured-output API and must be validated defensively. - Usage accounting may be less precise than direct API token accounting. - Ollama on GitHub-hosted runners can be slow and model downloads can dominate runtime; Foundry or other provider paths will require separate evaluation. -- Fake-provider fallback keeps automation green but produces deterministic placeholder-style summaries instead of true AI summaries until Copilot token setup is complete. +- Fake-provider fallback is limited to missing-token setup. Once `copilot-cli` is selected, runtime or structured-output failures fail the scheduled run so a later run can retry without publishing deterministic placeholder summaries. ## Alternatives considered diff --git a/docs/github-actions.md b/docs/github-actions.md index c154323..f53afe1 100644 --- a/docs/github-actions.md +++ b/docs/github-actions.md @@ -161,7 +161,7 @@ The workflow triggers hourly because GitHub cron is UTC-only and does not unders Operational learning: the first live News hourly run failed because Copilot CLI was requested but the token secret was empty. The workflow now selects an effective provider before installing Node/Copilot. If `copilot-cli` is requested without `COPILOT_REQUESTS_PAT` or `COPILOT_GITHUB_TOKEN`, it logs a warning and uses `AI_PROVIDER=fake` so the release state and Pages deployment path can still be validated end to end. -After enabling the Copilot PAT, one live run failed because Copilot CLI wrote JSON without the required `sections` array. The provider now treats invalid structured Copilot output as an AI-provider failure and falls back to the deterministic summary shape, recording `provider.type: copilot-cli-fallback` and the validation reason instead of failing the whole state/deploy pipeline. +After enabling the Copilot PAT, live runs showed that Copilot CLI can fail transiently or return malformed structured output. The provider retries Copilot CLI runtime failures and then fails with captured stdout/stderr diagnostics, while invalid structured output fails validation before publication so a later scheduled run can try again without publishing deterministic fake content. The Copilot CLI provider pins the briefing writer to `COPILOT_MODEL`, defaulting to Claude Sonnet 4.6 via the CLI model ID `claude-sonnet-4.6`, and invokes the repo-local `wazzup-writer` custom agent. This avoids the CLI's higher-cost default model while keeping the model overridable for manual canaries. diff --git a/src/wazzup/ai.py b/src/wazzup/ai.py index adf4283..f9df68d 100644 --- a/src/wazzup/ai.py +++ b/src/wazzup/ai.py @@ -96,6 +96,7 @@ def generate_transparency_report(self, request: TransparencyReportRequest) -> Tr DEFAULT_COPILOT_AGENT = "wazzup-writer" DEFAULT_COPILOT_CURATOR_AGENT = "wazzup-curator" DEFAULT_COPILOT_TRANSPARENCY_AGENT = "wazzup-transparency-reporter" +COPILOT_CLI_MAX_ATTEMPTS = 2 MAX_SUMMARY_HEADLINE_LENGTH = 80 MAX_SUMMARY_TITLE_LENGTH = 96 MAX_SUMMARY_DESCRIPTION_LENGTH = 220 @@ -231,6 +232,28 @@ def generate_transparency_report(self, request: TransparencyReportRequest) -> Tr ) +def run_copilot_cli(command: list[str], run_env: dict[str, str], failure_label: str) -> subprocess.CompletedProcess[str]: + failed_results: list[subprocess.CompletedProcess[str]] = [] + for _ in range(COPILOT_CLI_MAX_ATTEMPTS): + result = subprocess.run(command, capture_output=True, cwd=Path.cwd(), env=run_env, text=True) + if result.returncode == 0: + return result + failed_results.append(result) + + result = failed_results[-1] + details = [] + if result.stdout.strip(): + details.append(f"stdout: {result.stdout.strip()}") + if result.stderr.strip(): + details.append(f"stderr: {result.stderr.strip()}") + detail_text = "\n" + "\n".join(details) if details else "" + raise RuntimeError( + f"{failure_label} failed with exit code {result.returncode} after {COPILOT_CLI_MAX_ATTEMPTS} attempts. " + "Verify COPILOT_GITHUB_TOKEN has Copilot Requests permission, or use AI_PROVIDER=fake." + f"{detail_text}" + ) + + class CopilotCliCurationProvider: name = "copilot-cli" @@ -289,37 +312,13 @@ def curate_items(self, request: CurationRequest) -> CurationResponse: "--no-ask-user", ] ) - result = subprocess.run(command, capture_output=True, cwd=Path.cwd(), env=run_env, text=True) - if result.returncode != 0: - details = [] - if result.stdout.strip(): - details.append(f"stdout: {result.stdout.strip()}") - if result.stderr.strip(): - details.append(f"stderr: {result.stderr.strip()}") - detail_text = "\n" + "\n".join(details) if details else "" - raise RuntimeError( - f"Copilot CLI curation failed with exit code {result.returncode}. " - "Verify COPILOT_GITHUB_TOKEN has Copilot Requests permission, " - "or use AI_PROVIDER=fake." - f"{detail_text}" - ) + run_copilot_cli(command, run_env, "Copilot CLI curation") if not output_path.exists(): raise RuntimeError("Copilot CLI did not write curation-output.json") payload = json.loads(output_path.read_text(encoding="utf-8")) selected_ids = payload.get("selectedIds") if not isinstance(selected_ids, list) or not all(isinstance(item_id, str) for item_id in selected_ids): - fallback = FakeCurationProvider() - fallback_response = fallback.curate_items(request) - return CurationResponse( - selected_ids=fallback_response.selected_ids, - provider={ - **fallback_response.provider, - "type": "copilot-cli-fallback", - "fallbackFrom": self.name, - "fallbackReason": "Curator returned invalid selectedIds", - "validated": True, - }, - ) + raise ValueError("Copilot CLI curation returned invalid selectedIds") provider = { "type": self.name, "model": payload.get("model", self.model or "copilot-cli"), @@ -389,45 +388,18 @@ def generate_structured_summary(self, request: SummaryRequest) -> SummaryRespons "--no-ask-user", ] ) - result = subprocess.run(command, capture_output=True, cwd=Path.cwd(), env=run_env, text=True) - if result.returncode != 0: - details = [] - if result.stdout.strip(): - details.append(f"stdout: {result.stdout.strip()}") - if result.stderr.strip(): - details.append(f"stderr: {result.stderr.strip()}") - detail_text = "\n" + "\n".join(details) if details else "" - raise RuntimeError( - f"Copilot CLI failed with exit code {result.returncode}. " - "Verify COPILOT_GITHUB_TOKEN has Copilot Requests permission, " - "or use AI_PROVIDER=fake." - f"{detail_text}" - ) + run_copilot_cli(command, run_env, "Copilot CLI") if not output_path.exists(): raise RuntimeError("Copilot CLI did not write summary.json") payload = json.loads(output_path.read_text(encoding="utf-8")) - provider = { - "type": self.name, - "model": payload.get("model", self.model or "copilot-cli"), - "agent": self.agent or None, - "promptVersion": "summary-v1", - "validated": True, - } - try: + provider = { + "type": self.name, + "model": payload.get("model", self.model or "copilot-cli"), + "agent": self.agent or None, + "promptVersion": "summary-v1", + "validated": True, + } return response_from_payload(payload, provider=provider) - except ValueError as exc: - fallback = FakeSummaryProvider().generate_structured_summary(request) - return SummaryResponse( - headline=fallback.headline, - sections=fallback.sections, - provider={ - **fallback.provider, - "type": "copilot-cli-fallback", - "fallbackFrom": self.name, - "fallbackReason": str(exc), - "validated": True, - }, - ) class CopilotCliTransparencyReportProvider: diff --git a/tests/test_ai.py b/tests/test_ai.py index b43f3d4..0afc776 100644 --- a/tests/test_ai.py +++ b/tests/test_ai.py @@ -11,6 +11,7 @@ CopilotCliCurationProvider, CopilotCliSummaryProvider, CopilotCliTransparencyReportProvider, + COPILOT_CLI_MAX_ATTEMPTS, CurationRequest, DEFAULT_COPILOT_AGENT, DEFAULT_COPILOT_CURATOR_AGENT, @@ -372,7 +373,7 @@ def test_copilot_requires_token_in_github_actions(self, _which) -> None: # type @patch("wazzup.ai.subprocess.run") @patch("wazzup.ai.shutil.which", return_value="/usr/bin/copilot") - def test_copilot_invalid_payload_falls_back_to_deterministic_summary(self, _which, run_mock) -> None: # type: ignore[no-untyped-def] + def test_copilot_invalid_payload_fails_summary_generation(self, _which, run_mock) -> None: # type: ignore[no-untyped-def] previous_token = os.environ.get("COPILOT_GITHUB_TOKEN") os.environ["COPILOT_GITHUB_TOKEN"] = "test-token" source = load_sources("config/sources.yml")[0] @@ -387,26 +388,57 @@ def fake_run(_command, capture_output, cwd, env, text): # type: ignore[no-untyp run_mock.side_effect = fake_run try: - response = CopilotCliSummaryProvider().generate_structured_summary( - SummaryRequest( - kind="hourly", - window_start="2026-05-06T00:00:00Z", - window_end="2026-05-06T21:00:00Z", - generated_at="2026-05-06T21:00:00Z", - timezone="Europe/Amsterdam", - summary_language="en", - items=scored, + with self.assertRaisesRegex(ValueError, "missing sections"): + CopilotCliSummaryProvider().generate_structured_summary( + SummaryRequest( + kind="hourly", + window_start="2026-05-06T00:00:00Z", + window_end="2026-05-06T21:00:00Z", + generated_at="2026-05-06T21:00:00Z", + timezone="Europe/Amsterdam", + summary_language="en", + items=scored, + ) ) - ) finally: if previous_token is None: os.environ.pop("COPILOT_GITHUB_TOKEN", None) else: os.environ["COPILOT_GITHUB_TOKEN"] = previous_token - self.assertEqual("copilot-cli-fallback", response.provider["type"]) - self.assertIn("fallbackReason", response.provider) - self.assertTrue(response.sections[0]["bullets"]) + self.assertEqual(1, run_mock.call_count) + + @patch("wazzup.ai.subprocess.run") + @patch("wazzup.ai.shutil.which", return_value="/usr/bin/copilot") + def test_copilot_summary_retries_then_fails_on_runtime_failure(self, _which, run_mock) -> None: # type: ignore[no-untyped-def] + previous_token = os.environ.get("COPILOT_GITHUB_TOKEN") + os.environ["COPILOT_GITHUB_TOKEN"] = "test-token" + source = load_sources("config/sources.yml")[0] + item = parse_feed(source, Path("tests/fixtures/microsoft-security-blog.xml").read_bytes())[0] + scored = score_items([item], [source], load_app_config("config/interests.yml"), datetime(2026, 5, 6, tzinfo=UTC)) + run_mock.return_value = Mock(returncode=1, stdout="failed", stderr="upstream error") + try: + with self.assertRaisesRegex(RuntimeError, f"after {COPILOT_CLI_MAX_ATTEMPTS} attempts") as context: + CopilotCliSummaryProvider().generate_structured_summary( + SummaryRequest( + kind="hourly", + window_start="2026-05-06T00:00:00Z", + window_end="2026-05-06T21:00:00Z", + generated_at="2026-05-06T21:00:00Z", + timezone="Europe/Amsterdam", + summary_language="en", + items=scored, + ) + ) + finally: + if previous_token is None: + os.environ.pop("COPILOT_GITHUB_TOKEN", None) + else: + os.environ["COPILOT_GITHUB_TOKEN"] = previous_token + + self.assertEqual(COPILOT_CLI_MAX_ATTEMPTS, run_mock.call_count) + self.assertIn("stdout: failed", str(context.exception)) + self.assertIn("stderr: upstream error", str(context.exception)) class AiCurationProviderTests(unittest.TestCase): @@ -569,7 +601,7 @@ def fake_run(command, capture_output, cwd, env, text): # type: ignore[no-untype @patch("wazzup.ai.subprocess.run") @patch("wazzup.ai.shutil.which", return_value="/usr/bin/copilot") - def test_copilot_cli_curation_falls_back_on_invalid_response(self, _which, run_mock) -> None: # type: ignore[no-untyped-def] + def test_copilot_cli_curation_fails_on_invalid_response(self, _which, run_mock) -> None: # type: ignore[no-untyped-def] previous_token = os.environ.get("COPILOT_GITHUB_TOKEN") os.environ["COPILOT_GITHUB_TOKEN"] = "test-token" source = load_sources("config/sources.yml")[0] @@ -584,26 +616,57 @@ def fake_run(_command, capture_output, cwd, env, text): # type: ignore[no-untyp run_mock.side_effect = fake_run try: - response = CopilotCliCurationProvider().curate_items( - CurationRequest( - kind="hourly", - window_start="2026-05-06T20:00:00Z", - window_end="2026-05-06T21:00:00Z", - generated_at="2026-05-06T21:00:00Z", - timezone="Europe/Amsterdam", - items=scored, - max_items=12, + with self.assertRaisesRegex(ValueError, "invalid selectedIds"): + CopilotCliCurationProvider().curate_items( + CurationRequest( + kind="hourly", + window_start="2026-05-06T20:00:00Z", + window_end="2026-05-06T21:00:00Z", + generated_at="2026-05-06T21:00:00Z", + timezone="Europe/Amsterdam", + items=scored, + max_items=12, + ) ) - ) finally: if previous_token is None: os.environ.pop("COPILOT_GITHUB_TOKEN", None) else: os.environ["COPILOT_GITHUB_TOKEN"] = previous_token - self.assertEqual("copilot-cli-fallback", response.provider["type"]) - self.assertIn("fallbackReason", response.provider) - self.assertTrue(response.selected_ids) + self.assertEqual(1, run_mock.call_count) + + @patch("wazzup.ai.subprocess.run") + @patch("wazzup.ai.shutil.which", return_value="/usr/bin/copilot") + def test_copilot_cli_curation_retries_then_fails_on_runtime_failure(self, _which, run_mock) -> None: # type: ignore[no-untyped-def] + previous_token = os.environ.get("COPILOT_GITHUB_TOKEN") + os.environ["COPILOT_GITHUB_TOKEN"] = "test-token" + source = load_sources("config/sources.yml")[0] + item = parse_feed(source, Path("tests/fixtures/microsoft-security-blog.xml").read_bytes())[0] + scored = score_items([item], [source], load_app_config("config/interests.yml"), datetime(2026, 5, 6, tzinfo=UTC)) + run_mock.return_value = Mock(returncode=1, stdout="failed", stderr="upstream error") + try: + with self.assertRaisesRegex(RuntimeError, f"after {COPILOT_CLI_MAX_ATTEMPTS} attempts") as context: + CopilotCliCurationProvider().curate_items( + CurationRequest( + kind="hourly", + window_start="2026-05-06T20:00:00Z", + window_end="2026-05-06T21:00:00Z", + generated_at="2026-05-06T21:00:00Z", + timezone="Europe/Amsterdam", + items=scored, + max_items=12, + ) + ) + finally: + if previous_token is None: + os.environ.pop("COPILOT_GITHUB_TOKEN", None) + else: + os.environ["COPILOT_GITHUB_TOKEN"] = previous_token + + self.assertEqual(COPILOT_CLI_MAX_ATTEMPTS, run_mock.call_count) + self.assertIn("stdout: failed", str(context.exception)) + self.assertIn("stderr: upstream error", str(context.exception)) class AiTransparencyReportProviderTests(unittest.TestCase):