Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/adr/0002-ai-execution-strategy.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Implemented safety behavior:
- If `copilot-cli` is requested without either token secret, the workflow logs a warning and uses `AI_PROVIDER=fake`.
- [../../src/wazzup/ai.py](../../src/wazzup/ai.py) checks for `COPILOT_GITHUB_TOKEN` in GitHub Actions and raises an actionable error if the workflow guard is bypassed.
- The provider defaults to model `claude-sonnet-4.6` and the repo-local `wazzup-writer` custom agent, both overridable through environment variables.
- Copilot CLI stdout/stderr is captured and included in sanitized failure diagnostics when the CLI exits non-zero.
- Copilot CLI runtime failures are retried, and stdout/stderr is captured and included in sanitized failure diagnostics when all attempts exit non-zero.

## Consequences

Expand All @@ -57,7 +57,7 @@ Implemented safety behavior:
- CLI behavior can be less predictable than a direct structured-output API and must be validated defensively.
- Usage accounting may be less precise than direct API token accounting.
- Ollama on GitHub-hosted runners can be slow and model downloads can dominate runtime; Foundry or other provider paths will require separate evaluation.
- Fake-provider fallback keeps automation green but produces deterministic placeholder-style summaries instead of true AI summaries until Copilot token setup is complete.
- Fake-provider fallback is limited to missing-token setup. Once `copilot-cli` is selected, runtime or structured-output failures fail the scheduled run so a later run can retry without publishing deterministic placeholder summaries.

## Alternatives considered

Expand Down
2 changes: 1 addition & 1 deletion docs/github-actions.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ The workflow triggers hourly because GitHub cron is UTC-only and does not unders

Operational learning: the first live News hourly run failed because Copilot CLI was requested but the token secret was empty. The workflow now selects an effective provider before installing Node/Copilot. If `copilot-cli` is requested without `COPILOT_REQUESTS_PAT` or `COPILOT_GITHUB_TOKEN`, it logs a warning and uses `AI_PROVIDER=fake` so the release state and Pages deployment path can still be validated end to end.

After enabling the Copilot PAT, one live run failed because Copilot CLI wrote JSON without the required `sections` array. The provider now treats invalid structured Copilot output as an AI-provider failure and falls back to the deterministic summary shape, recording `provider.type: copilot-cli-fallback` and the validation reason instead of failing the whole state/deploy pipeline.
After enabling the Copilot PAT, live runs showed that Copilot CLI can fail transiently or return malformed structured output. The provider retries Copilot CLI runtime failures and then fails with captured stdout/stderr diagnostics, while invalid structured output fails validation before publication so a later scheduled run can try again without publishing deterministic fake content.

The Copilot CLI provider pins the briefing writer to `COPILOT_MODEL`, defaulting to Claude Sonnet 4.6 via the CLI model ID `claude-sonnet-4.6`, and invokes the repo-local `wazzup-writer` custom agent. This avoids the CLI's higher-cost default model while keeping the model overridable for manual canaries.

Expand Down
94 changes: 33 additions & 61 deletions src/wazzup/ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def generate_transparency_report(self, request: TransparencyReportRequest) -> Tr
DEFAULT_COPILOT_AGENT = "wazzup-writer"
DEFAULT_COPILOT_CURATOR_AGENT = "wazzup-curator"
DEFAULT_COPILOT_TRANSPARENCY_AGENT = "wazzup-transparency-reporter"
COPILOT_CLI_MAX_ATTEMPTS = 2
MAX_SUMMARY_HEADLINE_LENGTH = 80
MAX_SUMMARY_TITLE_LENGTH = 96
MAX_SUMMARY_DESCRIPTION_LENGTH = 220
Expand Down Expand Up @@ -231,6 +232,28 @@ def generate_transparency_report(self, request: TransparencyReportRequest) -> Tr
)


def run_copilot_cli(command: list[str], run_env: dict[str, str], failure_label: str) -> subprocess.CompletedProcess[str]:
failed_results: list[subprocess.CompletedProcess[str]] = []
for _ in range(COPILOT_CLI_MAX_ATTEMPTS):
result = subprocess.run(command, capture_output=True, cwd=Path.cwd(), env=run_env, text=True)
if result.returncode == 0:
return result
failed_results.append(result)

result = failed_results[-1]
details = []
if result.stdout.strip():
details.append(f"stdout: {result.stdout.strip()}")
if result.stderr.strip():
details.append(f"stderr: {result.stderr.strip()}")
detail_text = "\n" + "\n".join(details) if details else ""
raise RuntimeError(
f"{failure_label} failed with exit code {result.returncode} after {COPILOT_CLI_MAX_ATTEMPTS} attempts. "
"Verify COPILOT_GITHUB_TOKEN has Copilot Requests permission, or use AI_PROVIDER=fake."
f"{detail_text}"
)


class CopilotCliCurationProvider:
name = "copilot-cli"

Expand Down Expand Up @@ -289,37 +312,13 @@ def curate_items(self, request: CurationRequest) -> CurationResponse:
"--no-ask-user",
]
)
result = subprocess.run(command, capture_output=True, cwd=Path.cwd(), env=run_env, text=True)
if result.returncode != 0:
details = []
if result.stdout.strip():
details.append(f"stdout: {result.stdout.strip()}")
if result.stderr.strip():
details.append(f"stderr: {result.stderr.strip()}")
detail_text = "\n" + "\n".join(details) if details else ""
raise RuntimeError(
f"Copilot CLI curation failed with exit code {result.returncode}. "
"Verify COPILOT_GITHUB_TOKEN has Copilot Requests permission, "
"or use AI_PROVIDER=fake."
f"{detail_text}"
)
run_copilot_cli(command, run_env, "Copilot CLI curation")
if not output_path.exists():
raise RuntimeError("Copilot CLI did not write curation-output.json")
payload = json.loads(output_path.read_text(encoding="utf-8"))
selected_ids = payload.get("selectedIds")
if not isinstance(selected_ids, list) or not all(isinstance(item_id, str) for item_id in selected_ids):
fallback = FakeCurationProvider()
fallback_response = fallback.curate_items(request)
return CurationResponse(
selected_ids=fallback_response.selected_ids,
provider={
**fallback_response.provider,
"type": "copilot-cli-fallback",
"fallbackFrom": self.name,
"fallbackReason": "Curator returned invalid selectedIds",
"validated": True,
},
)
raise ValueError("Copilot CLI curation returned invalid selectedIds")
provider = {
"type": self.name,
"model": payload.get("model", self.model or "copilot-cli"),
Expand Down Expand Up @@ -389,45 +388,18 @@ def generate_structured_summary(self, request: SummaryRequest) -> SummaryRespons
"--no-ask-user",
]
)
result = subprocess.run(command, capture_output=True, cwd=Path.cwd(), env=run_env, text=True)
if result.returncode != 0:
details = []
if result.stdout.strip():
details.append(f"stdout: {result.stdout.strip()}")
if result.stderr.strip():
details.append(f"stderr: {result.stderr.strip()}")
detail_text = "\n" + "\n".join(details) if details else ""
raise RuntimeError(
f"Copilot CLI failed with exit code {result.returncode}. "
"Verify COPILOT_GITHUB_TOKEN has Copilot Requests permission, "
"or use AI_PROVIDER=fake."
f"{detail_text}"
)
run_copilot_cli(command, run_env, "Copilot CLI")
if not output_path.exists():
raise RuntimeError("Copilot CLI did not write summary.json")
payload = json.loads(output_path.read_text(encoding="utf-8"))
provider = {
"type": self.name,
"model": payload.get("model", self.model or "copilot-cli"),
"agent": self.agent or None,
"promptVersion": "summary-v1",
"validated": True,
}
try:
provider = {
"type": self.name,
"model": payload.get("model", self.model or "copilot-cli"),
"agent": self.agent or None,
"promptVersion": "summary-v1",
"validated": True,
}
return response_from_payload(payload, provider=provider)
except ValueError as exc:
fallback = FakeSummaryProvider().generate_structured_summary(request)
return SummaryResponse(
headline=fallback.headline,
sections=fallback.sections,
provider={
**fallback.provider,
"type": "copilot-cli-fallback",
"fallbackFrom": self.name,
"fallbackReason": str(exc),
"validated": True,
},
)


class CopilotCliTransparencyReportProvider:
Expand Down
119 changes: 91 additions & 28 deletions tests/test_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
CopilotCliCurationProvider,
CopilotCliSummaryProvider,
CopilotCliTransparencyReportProvider,
COPILOT_CLI_MAX_ATTEMPTS,
CurationRequest,
DEFAULT_COPILOT_AGENT,
DEFAULT_COPILOT_CURATOR_AGENT,
Expand Down Expand Up @@ -372,7 +373,7 @@ def test_copilot_requires_token_in_github_actions(self, _which) -> None: # type

@patch("wazzup.ai.subprocess.run")
@patch("wazzup.ai.shutil.which", return_value="/usr/bin/copilot")
def test_copilot_invalid_payload_falls_back_to_deterministic_summary(self, _which, run_mock) -> None: # type: ignore[no-untyped-def]
def test_copilot_invalid_payload_fails_summary_generation(self, _which, run_mock) -> None: # type: ignore[no-untyped-def]
previous_token = os.environ.get("COPILOT_GITHUB_TOKEN")
os.environ["COPILOT_GITHUB_TOKEN"] = "test-token"
source = load_sources("config/sources.yml")[0]
Expand All @@ -387,26 +388,57 @@ def fake_run(_command, capture_output, cwd, env, text): # type: ignore[no-untyp

run_mock.side_effect = fake_run
try:
response = CopilotCliSummaryProvider().generate_structured_summary(
SummaryRequest(
kind="hourly",
window_start="2026-05-06T00:00:00Z",
window_end="2026-05-06T21:00:00Z",
generated_at="2026-05-06T21:00:00Z",
timezone="Europe/Amsterdam",
summary_language="en",
items=scored,
with self.assertRaisesRegex(ValueError, "missing sections"):
CopilotCliSummaryProvider().generate_structured_summary(
SummaryRequest(
kind="hourly",
window_start="2026-05-06T00:00:00Z",
window_end="2026-05-06T21:00:00Z",
generated_at="2026-05-06T21:00:00Z",
timezone="Europe/Amsterdam",
summary_language="en",
items=scored,
)
)
)
finally:
if previous_token is None:
os.environ.pop("COPILOT_GITHUB_TOKEN", None)
else:
os.environ["COPILOT_GITHUB_TOKEN"] = previous_token

self.assertEqual("copilot-cli-fallback", response.provider["type"])
self.assertIn("fallbackReason", response.provider)
self.assertTrue(response.sections[0]["bullets"])
self.assertEqual(1, run_mock.call_count)

@patch("wazzup.ai.subprocess.run")
@patch("wazzup.ai.shutil.which", return_value="/usr/bin/copilot")
def test_copilot_summary_retries_then_fails_on_runtime_failure(self, _which, run_mock) -> None: # type: ignore[no-untyped-def]
previous_token = os.environ.get("COPILOT_GITHUB_TOKEN")
os.environ["COPILOT_GITHUB_TOKEN"] = "test-token"
source = load_sources("config/sources.yml")[0]
item = parse_feed(source, Path("tests/fixtures/microsoft-security-blog.xml").read_bytes())[0]
scored = score_items([item], [source], load_app_config("config/interests.yml"), datetime(2026, 5, 6, tzinfo=UTC))
run_mock.return_value = Mock(returncode=1, stdout="failed", stderr="upstream error")
try:
with self.assertRaisesRegex(RuntimeError, f"after {COPILOT_CLI_MAX_ATTEMPTS} attempts") as context:
CopilotCliSummaryProvider().generate_structured_summary(
SummaryRequest(
kind="hourly",
window_start="2026-05-06T00:00:00Z",
window_end="2026-05-06T21:00:00Z",
generated_at="2026-05-06T21:00:00Z",
timezone="Europe/Amsterdam",
summary_language="en",
items=scored,
)
)
finally:
if previous_token is None:
os.environ.pop("COPILOT_GITHUB_TOKEN", None)
else:
os.environ["COPILOT_GITHUB_TOKEN"] = previous_token

self.assertEqual(COPILOT_CLI_MAX_ATTEMPTS, run_mock.call_count)
self.assertIn("stdout: failed", str(context.exception))
self.assertIn("stderr: upstream error", str(context.exception))


class AiCurationProviderTests(unittest.TestCase):
Expand Down Expand Up @@ -569,7 +601,7 @@ def fake_run(command, capture_output, cwd, env, text): # type: ignore[no-untype

@patch("wazzup.ai.subprocess.run")
@patch("wazzup.ai.shutil.which", return_value="/usr/bin/copilot")
def test_copilot_cli_curation_falls_back_on_invalid_response(self, _which, run_mock) -> None: # type: ignore[no-untyped-def]
def test_copilot_cli_curation_fails_on_invalid_response(self, _which, run_mock) -> None: # type: ignore[no-untyped-def]
previous_token = os.environ.get("COPILOT_GITHUB_TOKEN")
os.environ["COPILOT_GITHUB_TOKEN"] = "test-token"
source = load_sources("config/sources.yml")[0]
Expand All @@ -584,26 +616,57 @@ def fake_run(_command, capture_output, cwd, env, text): # type: ignore[no-untyp

run_mock.side_effect = fake_run
try:
response = CopilotCliCurationProvider().curate_items(
CurationRequest(
kind="hourly",
window_start="2026-05-06T20:00:00Z",
window_end="2026-05-06T21:00:00Z",
generated_at="2026-05-06T21:00:00Z",
timezone="Europe/Amsterdam",
items=scored,
max_items=12,
with self.assertRaisesRegex(ValueError, "invalid selectedIds"):
CopilotCliCurationProvider().curate_items(
CurationRequest(
kind="hourly",
window_start="2026-05-06T20:00:00Z",
window_end="2026-05-06T21:00:00Z",
generated_at="2026-05-06T21:00:00Z",
timezone="Europe/Amsterdam",
items=scored,
max_items=12,
)
)
)
finally:
if previous_token is None:
os.environ.pop("COPILOT_GITHUB_TOKEN", None)
else:
os.environ["COPILOT_GITHUB_TOKEN"] = previous_token

self.assertEqual("copilot-cli-fallback", response.provider["type"])
self.assertIn("fallbackReason", response.provider)
self.assertTrue(response.selected_ids)
self.assertEqual(1, run_mock.call_count)

@patch("wazzup.ai.subprocess.run")
@patch("wazzup.ai.shutil.which", return_value="/usr/bin/copilot")
def test_copilot_cli_curation_retries_then_fails_on_runtime_failure(self, _which, run_mock) -> None: # type: ignore[no-untyped-def]
previous_token = os.environ.get("COPILOT_GITHUB_TOKEN")
os.environ["COPILOT_GITHUB_TOKEN"] = "test-token"
source = load_sources("config/sources.yml")[0]
item = parse_feed(source, Path("tests/fixtures/microsoft-security-blog.xml").read_bytes())[0]
scored = score_items([item], [source], load_app_config("config/interests.yml"), datetime(2026, 5, 6, tzinfo=UTC))
run_mock.return_value = Mock(returncode=1, stdout="failed", stderr="upstream error")
try:
with self.assertRaisesRegex(RuntimeError, f"after {COPILOT_CLI_MAX_ATTEMPTS} attempts") as context:
CopilotCliCurationProvider().curate_items(
CurationRequest(
kind="hourly",
window_start="2026-05-06T20:00:00Z",
window_end="2026-05-06T21:00:00Z",
generated_at="2026-05-06T21:00:00Z",
timezone="Europe/Amsterdam",
items=scored,
max_items=12,
)
)
finally:
if previous_token is None:
os.environ.pop("COPILOT_GITHUB_TOKEN", None)
else:
os.environ["COPILOT_GITHUB_TOKEN"] = previous_token

self.assertEqual(COPILOT_CLI_MAX_ATTEMPTS, run_mock.call_count)
self.assertIn("stdout: failed", str(context.exception))
self.assertIn("stderr: upstream error", str(context.exception))


class AiTransparencyReportProviderTests(unittest.TestCase):
Expand Down