From f4881dd870ea33e95ef6e50700c6c81f11793a4e Mon Sep 17 00:00:00 2001 From: Scott Raisbeck Date: Mon, 20 Apr 2026 21:00:31 +0100 Subject: [PATCH 1/2] feat: add GitHub Copilot CLI adapter with reasoning support Add CopilotCLIAdapter implementing the AgentAdapter protocol, enabling agent-shell to execute Copilot CLI as a supported agent type alongside Claude Code and OpenCode. Key details: - Maps Copilot CLI NDJSON events (reasoning_delta, message_delta, result, etc.) to shared StreamEvent types - Requires --enable-reasoning-summaries flag for thinking/reasoning events - Cost defaults to 0.0 (Copilot doesn't expose pricing) - Session ID extracted from the result event (not turn_start) - Uses --allow-all-tools by default for auto-approve, individual --allow-tool flags when allowed_tools list is specified 148 unit+integration tests + 10 E2E tests all passing (174 total). --- README.md | 29 +- .../adapters/copilot_cli_adapter.py | 194 +++++++++ src/agent_shell/shell.py | 2 + tests/e2e/test_copilot_cli_e2e.py | 193 +++++++++ .../test_copilot_cli_integration.py | 369 ++++++++++++++++++ tests/unit/copilot_fixtures.py | 280 +++++++++++++ tests/unit/test_copilot_cli_cancel.py | 22 ++ tests/unit/test_copilot_cli_execute.py | 92 +++++ tests/unit/test_copilot_cli_parse_event.py | 339 ++++++++++++++++ tests/unit/test_copilot_cli_stream.py | 272 +++++++++++++ tests/unit/test_shell.py | 10 +- 11 files changed, 1800 insertions(+), 2 deletions(-) create mode 100644 src/agent_shell/adapters/copilot_cli_adapter.py create mode 100644 tests/e2e/test_copilot_cli_e2e.py create mode 100644 tests/integration/test_copilot_cli_integration.py create mode 100644 tests/unit/copilot_fixtures.py create mode 100644 tests/unit/test_copilot_cli_cancel.py create mode 100644 tests/unit/test_copilot_cli_execute.py create mode 100644 tests/unit/test_copilot_cli_parse_event.py create mode 100644 tests/unit/test_copilot_cli_stream.py diff --git a/README.md b/README.md index 7068512..980ba36 100644 --- a/README.md +++ b/README.md @@ -108,12 +108,39 @@ logging.getLogger("agent_shell").addHandler(logging.StreamHandler()) Set to `DEBUG` for raw JSON events and full command arguments. +## Copilot CLI + +```python +from agent_shell.shell import AgentShell +from agent_shell.models.agent import AgentType + +shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + +response = await shell.execute( + cwd="/path/to/project", + prompt="Can you tell me about this project?", + model="gpt-4o", +) + +print(response.response) +print(f"Session: {response.session_id}") + +# Resume the conversation using the session_id +follow_up = await shell.execute( + cwd="/path/to/project", + prompt="Now refactor the auth module based on your findings", + session_id=response.session_id, +) +``` + +> **Note:** Copilot CLI doesn't expose pricing data. The `cost` field on `AgentResponse` will always be `0.0`. The `duration` field is populated from `usage.totalApiDurationMs`. + ## Supported CLI Agents: - [x] Claude Code - [x] OpenCode +- [x] Copilot CLI - [ ] Gemini CLI -- [ ] Copilot CLI - [ ] Codex diff --git a/src/agent_shell/adapters/copilot_cli_adapter.py b/src/agent_shell/adapters/copilot_cli_adapter.py new file mode 100644 index 0000000..2108d10 --- /dev/null +++ b/src/agent_shell/adapters/copilot_cli_adapter.py @@ -0,0 +1,194 @@ +import asyncio +import json +import logging +import os +from typing import AsyncIterator + +from agent_shell.models.agent import AgentResponse, StreamEvent +from agent_shell.process_cleanup import register_process_group, unregister_process_group + +logger = logging.getLogger("agent_shell.copilot_cli_adapter") + + +class CopilotCLIAdapter: + def __init__(self): + self._active_processes = [] + + async def execute( + self, + cwd: str, + prompt: str, + allowed_tools: list[str] | None = None, + model: str | None = None, + effort: str | None = None, + include_thinking: bool = False, + auto_approve: bool = True, + session_id: str | None = None, + ) -> AgentResponse: + chunks: list[StreamEvent] = [] + async for event in self.stream( + cwd=cwd, + prompt=prompt, + allowed_tools=allowed_tools, + model=model, + effort=effort, + include_thinking=include_thinking, + auto_approve=auto_approve, + session_id=session_id, + ): + chunks.append(event) + + text = "\n".join(e.content for e in chunks if e.type == "text") + cost = next((e.cost for e in reversed(chunks) if e.type == "result"), 0.0) + returned_session_id = next((e.session_id for e in chunks if e.session_id), None) + return AgentResponse(response=text, cost=cost, session_id=returned_session_id) + + async def stream( + self, + cwd: str, + prompt: str, + allowed_tools: list[str] | None = None, + model: str | None = None, + effort: str | None = None, + include_thinking: bool = False, + auto_approve: bool = True, + session_id: str | None = None, + ) -> AsyncIterator[StreamEvent]: + cmd = [ + "copilot", "-p", prompt, + "--output-format", "json", + "--silent", + ] + + if auto_approve: + cmd.append("--allow-all-tools") + + if allowed_tools: + for tool in allowed_tools: + cmd.extend(["--allow-tool", tool]) + + if model: + cmd.extend(["--model", model]) + + if effort: + cmd.extend(["--effort", effort]) + + if session_id: + cmd.extend(["--resume", session_id]) + + if include_thinking: + cmd.append("--enable-reasoning-summaries") + + logger.debug("Command: %s", cmd) + logger.info("Process started (cwd=%s)", os.path.abspath(cwd)) + + process = await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.DEVNULL, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=os.path.abspath(cwd), + preexec_fn=os.setsid, + ) + + self._active_processes.append(process) + register_process_group(process.pid) + + buffer = "" + while True: + chunk = await process.stdout.read(65536) + if not chunk: + if buffer.strip(): + try: + raw = json.loads(buffer) + logger.debug("Raw event: %s", raw) + for event in self._parse_event( + event=raw, + include_thinking=include_thinking, + ): + yield event + except json.JSONDecodeError: + logger.warning("Skipping malformed JSON: %s", buffer[:200]) + break + + buffer += chunk.decode("utf-8") + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + if line.strip(): + try: + raw = json.loads(line) + logger.debug("Raw event: %s", raw) + for event in self._parse_event( + event=raw, + include_thinking=include_thinking, + ): + yield event + except json.JSONDecodeError: + logger.warning("Skipping malformed JSON: %s", line[:200]) + + await process.wait() + if process in self._active_processes: + self._active_processes.remove(process) + unregister_process_group(process.pid) + + stderr = await process.stderr.read() + if stderr and process.returncode != 0: + error_msg = stderr.decode("utf-8")[-500:] + logger.warning("Process exited with code %d: %s", process.returncode, error_msg) + yield StreamEvent(type="error", content=error_msg) + + def _parse_event(self, event: dict, include_thinking: bool) -> list[StreamEvent]: + t = event.get("type", "") + events = [] + + if t == "assistant.turn_start": + events.append(StreamEvent(type="system", content="")) + + elif t == "assistant.reasoning_delta" and include_thinking: + delta_content = event.get("data", {}).get("deltaContent", "") + if delta_content: + events.append(StreamEvent(type="thinking", content=delta_content)) + + elif t == "assistant.reasoning" and include_thinking: + content = event.get("data", {}).get("content", "") or event.get("content", "") + if content: + events.append(StreamEvent(type="thinking", content=content)) + + elif t == "assistant.message_delta": + delta_content = event.get("data", {}).get("deltaContent", "") + if delta_content: + events.append(StreamEvent(type="text", content=delta_content)) + + elif t == "assistant.message": + tool_requests = event.get("data", {}).get("toolRequests", []) + for tool in tool_requests: + tool_name = tool.get("name", "") + logger.info("Tool call: %s", tool_name) + events.append(StreamEvent(type="tool_use", content=tool_name)) + + elif t == "result": + exit_code = event.get("exitCode", 0) + status = "ok" if exit_code == 0 else "error" + usage = event.get("usage", {}) + duration = (usage.get("totalApiDurationMs", 0) or 0) / 1000 + session_id = event.get("sessionId") + logger.info("Result: %s (duration=%.1fs)", status, duration) + events.append(StreamEvent( + type="result", + content=status, + cost=0.0, + duration=duration, + session_id=session_id, + )) + + return events + + async def cancel(self) -> None: + for process in self._active_processes: + try: + pgid = os.getpgid(process.pid) + os.killpg(pgid, 9) + unregister_process_group(pgid) + except ProcessLookupError: + pass + self._active_processes.clear() diff --git a/src/agent_shell/shell.py b/src/agent_shell/shell.py index 97dc95b..aec8fe0 100644 --- a/src/agent_shell/shell.py +++ b/src/agent_shell/shell.py @@ -6,6 +6,7 @@ from agent_shell.adapters.agent_adapter_protocol import AgentAdapter from agent_shell.adapters.claude_code_adapter import ClaudeCodeAdapter from agent_shell.adapters.opencode_adapter import OpenCodeAdapter +from agent_shell.adapters.copilot_cli_adapter import CopilotCLIAdapter class AgentShell(): @@ -16,6 +17,7 @@ def _resolve_adapter(self, agent_type: AgentType) -> AgentAdapter: adapters = { AgentType.CLAUDE_CODE: ClaudeCodeAdapter, AgentType.OPENCODE: OpenCodeAdapter, + AgentType.COPILOT_CLI: CopilotCLIAdapter, } adapter_cls = adapters.get(agent_type) diff --git a/tests/e2e/test_copilot_cli_e2e.py b/tests/e2e/test_copilot_cli_e2e.py new file mode 100644 index 0000000..98918b5 --- /dev/null +++ b/tests/e2e/test_copilot_cli_e2e.py @@ -0,0 +1,193 @@ +import pytest + +from agent_shell.shell import AgentShell +from agent_shell.models.agent import AgentType, AgentResponse, StreamEvent + + +pytestmark = pytest.mark.e2e + + +class TestStreamE2E: + async def test_stream_yields_text_and_result_events(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + + # Act + events: list[StreamEvent] = [] + async for event in shell.stream( + cwd="/tmp", + prompt="Respond with exactly: hello world", + allowed_tools=[], + ): + events.append(event) + + # Assert + text_events = [e for e in events if e.type == "text"] + result_events = [e for e in events if e.type == "result"] + + assert len(text_events) >= 1, "Expected at least one text event" + assert len(result_events) == 1, "Expected exactly one result event" + + async def test_stream_with_thinking_yields_thinking_events(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + + # Act + events: list[StreamEvent] = [] + async for event in shell.stream( + cwd="/tmp", + prompt="Respond with exactly: hello world", + allowed_tools=[], + effort="high", + include_thinking=True, + ): + events.append(event) + + # Assert + thinking_events = [e for e in events if e.type == "thinking"] + assert len(thinking_events) >= 1, "Expected at least one thinking event" + + async def test_stream_with_tool_use(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + + # Act + events: list[StreamEvent] = [] + async for event in shell.stream( + cwd="/tmp", + prompt="List the files in the current directory using the Bash tool", + allowed_tools=["Bash"], + ): + events.append(event) + + # Assert + tool_events = [e for e in events if e.type == "tool_use"] + assert len(tool_events) >= 1, "Expected at least one tool_use event" + + +class TestAutoApproveE2E: + async def test_stream_uses_tools_with_default_auto_approve(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + + # Act + events: list[StreamEvent] = [] + async for event in shell.stream( + cwd="/tmp", + prompt="Use the Bash tool to echo 'auto approved'", + allowed_tools=["Bash"], + ): + events.append(event) + + # Assert + tool_events = [e for e in events if e.type == "tool_use"] + assert len(tool_events) >= 1, "Expected tool use with default auto_approve=True" + + async def test_execute_completes_with_auto_approve_disabled(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + + # Act + response = await shell.execute( + cwd="/tmp", + prompt="Respond with exactly: no tools needed", + allowed_tools=[], + ) + + # Assert + assert isinstance(response, AgentResponse) + assert len(response.response) > 0, "Expected non-empty response" + + +class TestExecuteE2E: + async def test_execute_returns_response_with_text(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + + # Act + response = await shell.execute( + cwd="/tmp", + prompt="Respond with exactly: hello world", + allowed_tools=[], + ) + + # Assert + assert isinstance(response, AgentResponse) + assert len(response.response) > 0, "Expected non-empty response text" + assert response.cost == 0.0, "Expected cost to be 0.0 (Copilot has no pricing)" + + async def test_execute_with_effort(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + + # Act + response = await shell.execute( + cwd="/tmp", + prompt="Respond with exactly: hello world", + allowed_tools=[], + effort="high", + ) + + # Assert + assert isinstance(response, AgentResponse) + assert len(response.response) > 0, "Expected non-empty response text" + + +class TestSessionE2E: + async def test_stream_returns_session_id(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + + # Act + events: list[StreamEvent] = [] + async for event in shell.stream( + cwd="/tmp", + prompt="Respond with exactly: hello", + allowed_tools=[], + ): + events.append(event) + + # Assert + session_events = [e for e in events if e.session_id] + assert len(session_events) >= 1, "Expected at least one event with session_id" + assert isinstance(session_events[0].session_id, str) + assert len(session_events[0].session_id) > 0 + + async def test_execute_returns_session_id(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + + # Act + response = await shell.execute( + cwd="/tmp", + prompt="Respond with exactly: hello", + allowed_tools=[], + ) + + # Assert + assert isinstance(response, AgentResponse) + assert response.session_id is not None + assert len(response.session_id) > 0 + + async def test_resume_session_with_session_id(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + + # Act - first call to get a session_id + first_response = await shell.execute( + cwd="/tmp", + prompt="Remember the word 'banana'", + allowed_tools=[], + ) + + # Act - resume with session_id + second_response = await shell.execute( + cwd="/tmp", + prompt="What word did I ask you to remember?", + allowed_tools=[], + session_id=first_response.session_id, + ) + + # Assert + assert isinstance(second_response, AgentResponse) + assert len(second_response.response) > 0 diff --git a/tests/integration/test_copilot_cli_integration.py b/tests/integration/test_copilot_cli_integration.py new file mode 100644 index 0000000..9b36cf7 --- /dev/null +++ b/tests/integration/test_copilot_cli_integration.py @@ -0,0 +1,369 @@ +import json +from unittest.mock import AsyncMock, MagicMock, patch + +from agent_shell.shell import AgentShell +from agent_shell.models.agent import AgentType, AgentResponse, StreamEvent + +from tests.unit.copilot_fixtures import ( + TURN_START_EVENT, + MESSAGE_DELTA_EVENT, + MESSAGE_DELTA_EVENT_2, + MESSAGE_DELTA_EVENT_3, + REASONING_DELTA_EVENT, + MESSAGE_EVENT_NO_TOOLS, + MESSAGE_EVENT_WITH_TOOLS, + RESULT_EVENT_SUCCESS, +) + + +def _make_mock_process(ndjson_lines: list[dict], returncode: int = 0, stderr: bytes = b""): + """Create a mock subprocess that yields NDJSON lines from stdout.""" + encoded = "\n".join(json.dumps(line) for line in ndjson_lines) + "\n" + chunks = [encoded.encode("utf-8"), b""] + + process = AsyncMock() + process.stdout = MagicMock() + process.stdout.read = AsyncMock(side_effect=chunks) + process.stderr = MagicMock() + process.stderr.read = AsyncMock(return_value=stderr) + process.returncode = returncode + process.wait = AsyncMock() + process.pid = 12345 + return process + + +class TestStreamIntegration: + async def test_stream_yields_text_and_result_events(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [ + TURN_START_EVENT, + MESSAGE_DELTA_EVENT, + MESSAGE_EVENT_NO_TOOLS, + RESULT_EVENT_SUCCESS, + ] + mock_process = _make_mock_process(ndjson) + + # Act + events: list[StreamEvent] = [] + with patch("asyncio.create_subprocess_exec", return_value=mock_process): + async for event in shell.stream( + cwd="/tmp", + prompt="Respond with exactly: hello world", + allowed_tools=[], + ): + events.append(event) + + # Assert + text_events = [e for e in events if e.type == "text"] + result_events = [e for e in events if e.type == "result"] + + assert len(text_events) >= 1, "Expected at least one text event" + assert len(result_events) == 1, "Expected exactly one result event" + + async def test_stream_with_thinking_yields_thinking_events(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [ + TURN_START_EVENT, + REASONING_DELTA_EVENT, + MESSAGE_DELTA_EVENT, + MESSAGE_EVENT_NO_TOOLS, + RESULT_EVENT_SUCCESS, + ] + mock_process = _make_mock_process(ndjson) + + # Act + events: list[StreamEvent] = [] + with patch("asyncio.create_subprocess_exec", return_value=mock_process): + async for event in shell.stream( + cwd="/tmp", + prompt="Respond with exactly: hello world", + allowed_tools=[], + effort="high", + include_thinking=True, + ): + events.append(event) + + # Assert + thinking_events = [e for e in events if e.type == "thinking"] + assert len(thinking_events) >= 1, "Expected at least one thinking event" + + async def test_stream_with_tool_use(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [ + TURN_START_EVENT, + MESSAGE_DELTA_EVENT, + MESSAGE_EVENT_WITH_TOOLS, + RESULT_EVENT_SUCCESS, + ] + mock_process = _make_mock_process(ndjson) + + # Act + events: list[StreamEvent] = [] + with patch("asyncio.create_subprocess_exec", return_value=mock_process): + async for event in shell.stream( + cwd="/tmp", + prompt="Use bash to list files", + allowed_tools=["Bash"], + ): + events.append(event) + + # Assert + tool_events = [e for e in events if e.type == "tool_use"] + assert len(tool_events) >= 1, "Expected at least one tool_use event" + assert tool_events[0].content == "report_intent" + assert tool_events[1].content == "bash" + + +class TestAutoApproveIntegration: + async def test_stream_includes_allow_all_tools_by_default(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in shell.stream(cwd="/tmp", prompt="test"): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--allow-all-tools" in cmd_args + + async def test_stream_omits_allow_all_tools_when_disabled(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in shell.stream(cwd="/tmp", prompt="test", auto_approve=False): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--allow-all-tools" not in cmd_args + + async def test_execute_includes_allow_all_tools_by_default(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + await shell.execute(cwd="/tmp", prompt="test") + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--allow-all-tools" in cmd_args + + +class TestCommandConstructionIntegration: + async def test_base_command_includes_copilot_and_flags(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in shell.stream(cwd="/tmp", prompt="test"): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert cmd_args[0] == "copilot" + assert cmd_args[1] == "-p" + assert "--output-format" in cmd_args + assert cmd_args[cmd_args.index("--output-format") + 1] == "json" + assert "--silent" in cmd_args + + async def test_includes_model_flag(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in shell.stream( + cwd="/tmp", + prompt="test", + model="gpt-4o", + ): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--model" in cmd_args + assert cmd_args[cmd_args.index("--model") + 1] == "gpt-4o" + + async def test_includes_effort_flag(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in shell.stream( + cwd="/tmp", + prompt="test", + effort="high", + ): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--effort" in cmd_args + assert cmd_args[cmd_args.index("--effort") + 1] == "high" + + async def test_prompt_is_second_argument(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in shell.stream(cwd="/tmp", prompt="say hello"): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert cmd_args[2] == "say hello" + + async def test_stream_includes_reasoning_flag_when_include_thinking(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in shell.stream( + cwd="/tmp", + prompt="test", + include_thinking=True, + ): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--enable-reasoning-summaries" in cmd_args + + async def test_stream_omits_reasoning_flag_when_not_include_thinking(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in shell.stream( + cwd="/tmp", + prompt="test", + include_thinking=False, + ): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--enable-reasoning-summaries" not in cmd_args + + +class TestExecuteIntegration: + async def test_execute_returns_response_with_text(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [ + TURN_START_EVENT, + MESSAGE_DELTA_EVENT, + MESSAGE_EVENT_NO_TOOLS, + RESULT_EVENT_SUCCESS, + ] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process): + response = await shell.execute( + cwd="/tmp", + prompt="Respond with exactly: hello world", + allowed_tools=[], + ) + + # Assert + assert isinstance(response, AgentResponse) + assert len(response.response) > 0, "Expected non-empty response text" + assert response.cost == 0.0, "Expected cost to be 0.0 (Copilot has no pricing)" + + +class TestSessionIntegration: + async def test_stream_captures_session_id(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [MESSAGE_DELTA_EVENT, MESSAGE_EVENT_NO_TOOLS, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + events: list[StreamEvent] = [] + with patch("asyncio.create_subprocess_exec", return_value=mock_process): + async for event in shell.stream(cwd="/tmp", prompt="test"): + events.append(event) + + # Assert + session_events = [e for e in events if e.session_id] + assert len(session_events) >= 1, "Expected at least one event with session_id" + assert session_events[0].session_id == "01036873-9931-4e3e-b3cb-14793ae370f9" + + async def test_stream_passes_resume_flag_when_session_id_provided(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in shell.stream( + cwd="/tmp", prompt="test", session_id="abc-123" + ): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--resume" in cmd_args + assert cmd_args[cmd_args.index("--resume") + 1] == "abc-123" + + async def test_stream_omits_resume_flag_when_no_session_id(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in shell.stream(cwd="/tmp", prompt="test"): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--resume" not in cmd_args + + async def test_execute_returns_session_id(self): + # Arrange + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + ndjson = [MESSAGE_DELTA_EVENT, MESSAGE_EVENT_NO_TOOLS, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process): + response = await shell.execute(cwd="/tmp", prompt="test") + + # Assert + assert isinstance(response, AgentResponse) + assert response.session_id == "01036873-9931-4e3e-b3cb-14793ae370f9" diff --git a/tests/unit/copilot_fixtures.py b/tests/unit/copilot_fixtures.py new file mode 100644 index 0000000..ee4720b --- /dev/null +++ b/tests/unit/copilot_fixtures.py @@ -0,0 +1,280 @@ +"""NDJSON event fixtures captured from a real Copilot CLI --output-format=json session.""" + +MCP_SERVER_STATUS_EVENT = { + "type": "session.mcp_server_status_changed", + "data": {"serverName": "forgetful_local", "status": "connected"}, + "id": "954b2564-d2a4-4bbe-97b6-08680ef76a2f", + "timestamp": "2026-04-18T23:14:47.199Z", + "parentId": "ac064ea6-4c7f-4fe0-bc76-37fcb36f683b", + "ephemeral": True, +} + +MCP_SERVERS_LOADED_EVENT = { + "type": "session.mcp_servers_loaded", + "data": { + "servers": [ + {"name": "forgetful_local", "status": "connected"}, + {"name": "github-mcp-server", "status": "connected", "source": "builtin"}, + ] + }, + "id": "1470066d-9a3d-4a9f-a954-4ece6106eaec", + "timestamp": "2026-04-18T23:14:49.019Z", + "parentId": "e8ad02ab-07ac-4074-934a-7aafdb0ee412", + "ephemeral": True, +} + +TOOLS_UPDATED_EVENT = { + "type": "session.tools_updated", + "data": {"model": "gpt-5.4"}, + "id": "8aeef1f8-a01f-4e96-a0b2-f7a288aefcc3", + "timestamp": "2026-04-18T23:14:49.955Z", + "parentId": "7c18d071-ce76-4ecc-a760-861bdbdf82c8", + "ephemeral": True, +} + +USER_MESSAGE_EVENT = { + "type": "user.message", + "data": { + "content": "Return exactly: HELLO_WORLD", + "transformedContent": "2026-04-18T23:14:49.957Z\n\nReturn exactly: HELLO_WORLD\n\n\nNo tables currently exist. Default tables (todos, todo_deps) will be created automatically when you first use the SQL tool.\n", + "attachments": [], + "interactionId": "798f95a9-bdad-44c9-96e7-ebe5c6e36ff0", + }, + "id": "f6921d35-e136-4ff1-a8a6-0178fb93fd90", + "timestamp": "2026-04-18T23:14:49.957Z", + "parentId": "8aeef1f8-a01f-4e96-a0b2-f7a288aefcc3", +} + +TURN_START_EVENT = { + "type": "assistant.turn_start", + "data": { + "turnId": "0", + "interactionId": "798f95a9-bdad-44c9-96e7-ebe5c6e36ff0", + }, + "id": "2038efff-d58d-4c68-bf57-77fa2eaa9d55", + "timestamp": "2026-04-18T23:14:49.959Z", + "parentId": "f6921d35-e136-4ff1-a8a6-0178fb93fd90", +} + +REASONING_DELTA_EVENT = { + "type": "assistant.reasoning_delta", + "data": { + "reasoningId": "a835bc4b-be16-4755-8716-11832e3a24bf", + "deltaContent": "The user wants me to return exactly 'HELLO_WORLD'.", + }, + "id": "17a22f93-76d5-426a-8401-fc5864fd6462", + "timestamp": "2026-04-18T23:15:26.706Z", + "parentId": "f3711656-8e19-4a08-afab-065438f180c5", + "ephemeral": True, +} + +REASONING_EVENT = { + "type": "assistant.reasoning", + "data": { + "reasoningId": "a835bc4b-be16-4755-8716-11832e3a24bf", + "content": "**Planning file listing task**\n\nI need to list files in the current directory.", + }, + "id": "a86297f1-8be2-4660-9975-e543e40c3bb9", + "timestamp": "2026-04-18T23:15:27.925Z", + "parentId": "c87e534d-0729-4c73-849f-e82b06f4623a", + "ephemeral": True, +} + +MESSAGE_DELTA_EVENT = { + "type": "assistant.message_delta", + "data": { + "messageId": "34f2015a-70d0-426c-aa7d-96e81fba07f4", + "deltaContent": "HEL", + }, + "id": "aa2a7e35-6215-42c3-9938-e27eca6ca547", + "timestamp": "2026-04-18T23:14:51.091Z", + "parentId": "ec30f7a2-dc1d-4ee9-a2ef-b035a0129140", + "ephemeral": True, +} + +MESSAGE_DELTA_EVENT_2 = { + "type": "assistant.message_delta", + "data": { + "messageId": "34f2015a-70d0-426c-aa7d-96e81fba07f4", + "deltaContent": "LO", + }, + "id": "18f66722-7856-422f-9d4e-260440adfc99", + "timestamp": "2026-04-18T23:14:51.091Z", + "parentId": "d272213c-2946-4bca-aad4-d16c1e4c62b9", + "ephemeral": True, +} + +MESSAGE_DELTA_EVENT_3 = { + "type": "assistant.message_delta", + "data": { + "messageId": "34f2015a-70d0-426c-aa7d-96e81fba07f4", + "deltaContent": "_WORLD", + }, + "id": "5736be52-8dbb-4796-b0ab-ccb84c22ed32", + "timestamp": "2026-04-18T23:14:51.091Z", + "parentId": "3fbdfcc3-f177-4dbf-ac8f-0e002b2e44d1", + "ephemeral": True, +} + +MESSAGE_EVENT_NO_TOOLS = { + "type": "assistant.message", + "data": { + "messageId": "34f2015a-70d0-426c-aa7d-96e81fba07f4", + "content": "HELLO_WORLD", + "toolRequests": [], + "interactionId": "798f95a9-bdad-44c9-96e7-ebe5c6e36ff0", + "phase": "final_answer", + "outputTokens": 35, + }, + "id": "39f684a7-36b6-4377-93cc-07150b543291", + "timestamp": "2026-04-18T23:14:51.604Z", + "parentId": "88f85973-8dfd-447a-8010-dba9bbaedcac", +} + +MESSAGE_EVENT_WITH_TOOLS = { + "type": "assistant.message", + "data": { + "messageId": "76574897-43de-41df-bf91-ac08a1097d06", + "content": "I'll grab the first three regular files and save them.", + "toolRequests": [ + { + "toolCallId": "call_V5dpqC28CIgUApSB409sp3gf", + "name": "report_intent", + "arguments": {"intent": "Writing file list"}, + "type": "function", + }, + { + "toolCallId": "call_sKHBE2LZ7EyG5SXpBVCdlbws", + "name": "bash", + "arguments": { + "command": "cd /tmp && find . -maxdepth 1 -type f -printf '%f\\n' | sort | head -n 3", + "description": "List first 3 files and save them", + "mode": "sync", + "initial_wait": 30, + }, + "type": "function", + "intentionSummary": "List first 3 files and save them", + }, + ], + "interactionId": "083460d5-14bb-4cc0-b06c-2378a14bffbf", + "phase": "commentary", + "outputTokens": 460, + }, + "id": "c87e534d-0729-4c73-849f-e82b06f4623a", + "timestamp": "2026-04-18T23:15:27.924Z", + "parentId": "d685fba7-7f52-47cf-8ad8-a513661328ae", +} + +TOOL_EXEC_START_EVENT = { + "type": "tool.execution_start", + "data": { + "toolCallId": "call_sKHBE2LZ7EyG5SXpBVCdlbws", + "toolName": "bash", + "arguments": { + "command": "cd /tmp && find . -maxdepth 1 -type f -printf '%f\\n' | sort | head -n 3 | tee /tmp/filelist.txt", + "description": "List first 3 files and save them", + "mode": "sync", + "initial_wait": 30, + }, + }, + "id": "8ccbfa42-a65e-46ab-8e5e-6963b9190223", + "timestamp": "2026-04-18T23:15:27.925Z", + "parentId": "421227e5-119f-4261-900f-9003ee168082", +} + +TOOL_EXEC_COMPLETE_EVENT = { + "type": "tool.execution_complete", + "data": { + "toolCallId": "call_sKHBE2LZ7EyG5SXpBVCdlbws", + "model": "gpt-5.4", + "interactionId": "083460d5-14bb-4cc0-b06c-2378a14bffbf", + "success": True, + "result": { + "content": ".5ff77f977c5d5335-00000000.so\n", + "detailedContent": ".5ff77f977c5d5335-00000000.so\n", + }, + "toolTelemetry": { + "properties": {"customTimeout": "true", "executionMode": "sync"}, + "metrics": {"commandTimeout": 30000}, + }, + }, + "id": "fe967dc5-486e-4a5d-8ed6-746c6899d9f7", + "timestamp": "2026-04-18T23:15:28.467Z", + "parentId": "56b75960-857e-4bfd-b87c-b9104104d90a", +} + +TURN_END_EVENT = { + "type": "assistant.turn_end", + "data": {"turnId": "0"}, + "id": "46cfed53-cda6-41a6-9258-97b1041680fc", + "timestamp": "2026-04-18T23:14:51.604Z", + "parentId": "39f684a7-36b6-4377-93cc-07150b543291", +} + +RESULT_EVENT_SUCCESS = { + "type": "result", + "timestamp": "2026-04-18T23:14:51.605Z", + "sessionId": "01036873-9931-4e3e-b3cb-14793ae370f9", + "exitCode": 0, + "usage": { + "premiumRequests": 1, + "totalApiDurationMs": 1138, + "sessionDurationMs": 4454, + "codeChanges": { + "linesAdded": 0, + "linesRemoved": 0, + "filesModified": [], + }, + }, +} + +RESULT_EVENT_SUCCESS_WITH_USAGE = { + "type": "result", + "timestamp": "2026-04-18T23:15:31.773Z", + "sessionId": "e9a5f1b2-52cd-4966-9cf3-826579fc5020", + "exitCode": 0, + "usage": { + "premiumRequests": 1, + "totalApiDurationMs": 14397, + "sessionDurationMs": 14779, + "codeChanges": { + "linesAdded": 0, + "linesRemoved": 0, + "filesModified": [], + }, + }, +} + +RESULT_EVENT_ERROR = { + "type": "result", + "timestamp": "2026-04-18T23:14:51.605Z", + "sessionId": "01036873-9931-4e3e-b3cb-14793ae370f9", + "exitCode": 1, + "usage": { + "premiumRequests": 1, + "totalApiDurationMs": 5000, + "sessionDurationMs": 5000, + "codeChanges": { + "linesAdded": 0, + "linesRemoved": 0, + "filesModified": [], + }, + }, +} + +BACKGROUND_TASKS_CHANGED_EVENT = { + "type": "session.background_tasks_changed", + "data": {}, + "id": "c8ecb320-7cce-4dca-a1c8-c891a81a6f0f", + "timestamp": "2026-04-18T23:15:28.162Z", + "parentId": "474e8bfc-7d20-4796-8fde-811c3d7d7aa9", + "ephemeral": True, +} + +UNKNOWN_EVENT = { + "type": "session.unknown_internal_event", + "data": {"foo": "bar"}, + "id": "unknown-event-id", + "timestamp": "2026-04-18T23:14:47.000Z", + "ephemeral": True, +} diff --git a/tests/unit/test_copilot_cli_cancel.py b/tests/unit/test_copilot_cli_cancel.py new file mode 100644 index 0000000..f87f393 --- /dev/null +++ b/tests/unit/test_copilot_cli_cancel.py @@ -0,0 +1,22 @@ +from unittest.mock import patch, AsyncMock + +from agent_shell.adapters.copilot_cli_adapter import CopilotCLIAdapter + + +class TestCancel: + async def test_kills_active_processes_and_clears_list(self): + # Arrange + adapter = CopilotCLIAdapter() + mock_process = AsyncMock() + mock_process.pid = 12345 + adapter._active_processes = [mock_process] + + # Act + with patch("os.getpgid", return_value=12345) as mock_getpgid, \ + patch("os.killpg") as mock_killpg: + await adapter.cancel() + + # Assert + mock_getpgid.assert_called_once_with(12345) + mock_killpg.assert_called_once_with(12345, 9) + assert len(adapter._active_processes) == 0 diff --git a/tests/unit/test_copilot_cli_execute.py b/tests/unit/test_copilot_cli_execute.py new file mode 100644 index 0000000..704f37a --- /dev/null +++ b/tests/unit/test_copilot_cli_execute.py @@ -0,0 +1,92 @@ +import asyncio +import json +from unittest.mock import AsyncMock, patch, MagicMock + +from agent_shell.adapters.copilot_cli_adapter import CopilotCLIAdapter +from agent_shell.models.agent import AgentResponse, StreamEvent + +from tests.unit.copilot_fixtures import ( + TURN_START_EVENT, + MESSAGE_DELTA_EVENT, + MESSAGE_DELTA_EVENT_2, + MESSAGE_DELTA_EVENT_3, + MESSAGE_EVENT_NO_TOOLS, + RESULT_EVENT_SUCCESS, +) + + +def _make_mock_process(ndjson_lines: list[dict], returncode: int = 0, stderr: bytes = b""): + """Create a mock subprocess that yields NDJSON lines from stdout.""" + encoded = "\n".join(json.dumps(line) for line in ndjson_lines) + "\n" + chunks = [encoded.encode("utf-8"), b""] + + process = AsyncMock() + process.stdout = MagicMock() + process.stdout.read = AsyncMock(side_effect=chunks) + process.stderr = MagicMock() + process.stderr.read = AsyncMock(return_value=stderr) + process.returncode = returncode + process.wait = AsyncMock() + process.pid = 12345 + return process + + +class TestExecute: + async def test_returns_response_with_text_and_session_id(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [ + TURN_START_EVENT, + MESSAGE_DELTA_EVENT, + MESSAGE_DELTA_EVENT_2, + MESSAGE_DELTA_EVENT_3, + MESSAGE_EVENT_NO_TOOLS, + RESULT_EVENT_SUCCESS, + ] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process): + response = await adapter.execute(cwd="/tmp", prompt="test") + + # Assert + assert isinstance(response, AgentResponse) + assert response.response == "HEL\nLO\n_WORLD" + assert response.session_id == "01036873-9931-4e3e-b3cb-14793ae370f9" + + async def test_returns_response_with_empty_cost(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [ + MESSAGE_DELTA_EVENT, + MESSAGE_EVENT_NO_TOOLS, + RESULT_EVENT_SUCCESS, + ] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process): + response = await adapter.execute(cwd="/tmp", prompt="test") + + # Assert + assert isinstance(response, AgentResponse) + assert response.cost == 0.0 + + async def test_aggregates_text_from_multiple_deltas(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [ + MESSAGE_DELTA_EVENT, + MESSAGE_DELTA_EVENT_2, + MESSAGE_DELTA_EVENT_3, + MESSAGE_EVENT_NO_TOOLS, + RESULT_EVENT_SUCCESS, + ] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process): + response = await adapter.execute(cwd="/tmp", prompt="test") + + # Assert + assert response.response == "HEL\nLO\n_WORLD" diff --git a/tests/unit/test_copilot_cli_parse_event.py b/tests/unit/test_copilot_cli_parse_event.py new file mode 100644 index 0000000..11cdb9a --- /dev/null +++ b/tests/unit/test_copilot_cli_parse_event.py @@ -0,0 +1,339 @@ +from agent_shell.adapters.copilot_cli_adapter import CopilotCLIAdapter +from agent_shell.models.agent import StreamEvent + +from tests.unit.copilot_fixtures import ( + MCP_SERVER_STATUS_EVENT, + MCP_SERVERS_LOADED_EVENT, + TOOLS_UPDATED_EVENT, + BACKGROUND_TASKS_CHANGED_EVENT, + USER_MESSAGE_EVENT, + TURN_START_EVENT, + REASONING_DELTA_EVENT, + REASONING_EVENT, + MESSAGE_DELTA_EVENT, + MESSAGE_EVENT_NO_TOOLS, + MESSAGE_EVENT_WITH_TOOLS, + TOOL_EXEC_START_EVENT, + TOOL_EXEC_COMPLETE_EVENT, + TURN_END_EVENT, + RESULT_EVENT_SUCCESS, + RESULT_EVENT_ERROR, + UNKNOWN_EVENT, +) + + +class TestParseEventSessionEvents: + def test_ignores_mcp_server_status_changed(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(MCP_SERVER_STATUS_EVENT, include_thinking=False) + + # Assert + assert len(events) == 0 + + def test_ignores_mcp_servers_loaded(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(MCP_SERVERS_LOADED_EVENT, include_thinking=False) + + # Assert + assert len(events) == 0 + + def test_ignores_tools_updated(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(TOOLS_UPDATED_EVENT, include_thinking=False) + + # Assert + assert len(events) == 0 + + def test_ignores_background_tasks_changed(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(BACKGROUND_TASKS_CHANGED_EVENT, include_thinking=False) + + # Assert + assert len(events) == 0 + + +class TestParseEventUserMessage: + def test_ignores_user_message_event(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(USER_MESSAGE_EVENT, include_thinking=False) + + # Assert + assert len(events) == 0 + + +class TestParseEventTurnStart: + def test_emits_system_event(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(TURN_START_EVENT, include_thinking=False) + + # Assert + assert len(events) == 1 + assert events[0].type == "system" + assert events[0].content == "" + assert events[0].session_id is None + + +class TestParseEventReasoning: + def test_includes_reasoning_delta_when_flag_true(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(REASONING_DELTA_EVENT, include_thinking=True) + + # Assert + assert len(events) == 1 + assert events[0].type == "thinking" + assert events[0].content == "The user wants me to return exactly 'HELLO_WORLD'." + + def test_excludes_reasoning_delta_when_flag_false(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(REASONING_DELTA_EVENT, include_thinking=False) + + # Assert + assert len(events) == 0 + + def test_includes_reasoning_block_when_flag_true(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(REASONING_EVENT, include_thinking=True) + + # Assert + assert len(events) == 1 + assert events[0].type == "thinking" + assert "**Planning file listing task**" in events[0].content + + def test_excludes_reasoning_block_when_flag_false(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(REASONING_EVENT, include_thinking=False) + + # Assert + assert len(events) == 0 + + +class TestParseEventMessageDelta: + def test_emits_text_from_delta(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(MESSAGE_DELTA_EVENT, include_thinking=False) + + # Assert + assert len(events) == 1 + assert events[0].type == "text" + assert events[0].content == "HEL" + + +class TestParseEventMessageNoTools: + def test_ignores_message_without_tool_requests(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(MESSAGE_EVENT_NO_TOOLS, include_thinking=False) + + # Assert + assert len(events) == 0 + + +class TestParseEventMessageWithTools: + def test_emits_tool_use_for_each_request(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(MESSAGE_EVENT_WITH_TOOLS, include_thinking=False) + + # Assert + assert len(events) == 2 + assert events[0].type == "tool_use" + assert events[0].content == "report_intent" + assert events[1].type == "tool_use" + assert events[1].content == "bash" + + +class TestParseEventToolExecution: + def test_ignores_tool_execution_start(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(TOOL_EXEC_START_EVENT, include_thinking=False) + + # Assert + assert len(events) == 0 + + def test_ignores_tool_execution_complete(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(TOOL_EXEC_COMPLETE_EVENT, include_thinking=False) + + # Assert + assert len(events) == 0 + + +class TestParseEventTurnEnd: + def test_ignores_turn_end(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(TURN_END_EVENT, include_thinking=False) + + # Assert + assert len(events) == 0 + + +class TestParseEventResult: + def test_parses_successful_result(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(RESULT_EVENT_SUCCESS, include_thinking=False) + + # Assert + assert len(events) == 1 + assert events[0].type == "result" + assert events[0].content == "ok" + assert events[0].cost == 0.0 + assert events[0].duration == 1.138 + assert events[0].session_id == "01036873-9931-4e3e-b3cb-14793ae370f9" + + def test_parses_error_result(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(RESULT_EVENT_ERROR, include_thinking=False) + + # Assert + assert len(events) == 1 + assert events[0].type == "result" + assert events[0].content == "error" + assert events[0].cost == 0.0 + assert events[0].duration == 5.0 + assert events[0].session_id == "01036873-9931-4e3e-b3cb-14793ae370f9" + + def test_result_without_usage_defaults_duration_to_zero(self): + # Arrange + adapter = CopilotCLIAdapter() + event = { + "type": "result", + "timestamp": "2026-04-18T23:14:51.605Z", + "sessionId": "test-session", + "exitCode": 0, + "usage": {}, + } + + # Act + events = adapter._parse_event(event, include_thinking=False) + + # Assert + assert len(events) == 1 + assert events[0].duration == 0.0 + assert events[0].session_id == "test-session" + + def test_result_without_session_id(self): + # Arrange + adapter = CopilotCLIAdapter() + event = { + "type": "result", + "timestamp": "2026-04-18T23:14:51.605Z", + "exitCode": 0, + "usage": {}, + } + + # Act + events = adapter._parse_event(event, include_thinking=False) + + # Assert + assert len(events) == 1 + assert events[0].session_id is None + + +class TestParseEventUnknown: + def test_ignores_unknown_event(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(UNKNOWN_EVENT, include_thinking=False) + + # Assert + assert len(events) == 0 + + +class TestParseEventReasoningDisabled: + def test_reasoning_delta_ignored_when_include_thinking_false(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(REASONING_DELTA_EVENT, include_thinking=False) + + # Assert + assert len(events) == 0 + + def test_reasoning_block_ignored_when_include_thinking_false(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(REASONING_EVENT, include_thinking=False) + + # Assert + assert len(events) == 0 + + def test_reasoning_delta_emitted_when_include_thinking_true(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(REASONING_DELTA_EVENT, include_thinking=True) + + # Assert + assert len(events) == 1 + assert events[0].type == "thinking" + assert "HELLO_WORLD" in events[0].content + + def test_reasoning_block_emitted_when_include_thinking_true(self): + # Arrange + adapter = CopilotCLIAdapter() + + # Act + events = adapter._parse_event(REASONING_EVENT, include_thinking=True) + + # Assert + assert len(events) == 1 + assert events[0].type == "thinking" + assert "Planning" in events[0].content diff --git a/tests/unit/test_copilot_cli_stream.py b/tests/unit/test_copilot_cli_stream.py new file mode 100644 index 0000000..da750f5 --- /dev/null +++ b/tests/unit/test_copilot_cli_stream.py @@ -0,0 +1,272 @@ +import asyncio +import json +from unittest.mock import AsyncMock, patch, MagicMock + +from agent_shell.adapters.copilot_cli_adapter import CopilotCLIAdapter +from agent_shell.models.agent import StreamEvent + +from tests.unit.copilot_fixtures import ( + TURN_START_EVENT, + MESSAGE_DELTA_EVENT, + MESSAGE_EVENT_NO_TOOLS, + RESULT_EVENT_SUCCESS, +) + + +def _make_mock_process(ndjson_lines: list[dict], returncode: int = 0, stderr: bytes = b""): + """Create a mock subprocess that yields NDJSON lines from stdout.""" + encoded = "\n".join(json.dumps(line) for line in ndjson_lines) + "\n" + chunks = [encoded.encode("utf-8"), b""] + + process = AsyncMock() + process.stdout = MagicMock() + process.stdout.read = AsyncMock(side_effect=chunks) + process.stderr = MagicMock() + process.stderr.read = AsyncMock(return_value=stderr) + process.returncode = returncode + process.wait = AsyncMock() + process.pid = 12345 + return process + + +class TestStream: + async def test_yields_events_in_order(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [TURN_START_EVENT, MESSAGE_DELTA_EVENT, MESSAGE_EVENT_NO_TOOLS, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + events: list[StreamEvent] = [] + with patch("asyncio.create_subprocess_exec", return_value=mock_process): + async for event in adapter.stream(cwd="/tmp", prompt="test"): + events.append(event) + + # Assert + assert len(events) == 3 + assert events[0].type == "system" + assert events[1].type == "text" + assert events[1].content == "HEL" + assert events[2].type == "result" + + async def test_yields_error_event_on_nonzero_exit_with_stderr(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [MESSAGE_DELTA_EVENT] + mock_process = _make_mock_process(ndjson, returncode=1, stderr=b"something went wrong") + + # Act + events: list[StreamEvent] = [] + with patch("asyncio.create_subprocess_exec", return_value=mock_process): + async for event in adapter.stream(cwd="/tmp", prompt="test"): + events.append(event) + + # Assert + assert events[-1].type == "error" + assert "something went wrong" in events[-1].content + + async def test_includes_model_flag_in_command(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in adapter.stream(cwd="/tmp", prompt="test", model="gpt-4o"): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--model" in cmd_args + assert cmd_args[cmd_args.index("--model") + 1] == "gpt-4o" + + async def test_omits_model_flag_when_none(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in adapter.stream(cwd="/tmp", prompt="test"): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--model" not in cmd_args + + async def test_includes_effort_flag_in_command(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in adapter.stream(cwd="/tmp", prompt="test", effort="high"): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--effort" in cmd_args + assert cmd_args[cmd_args.index("--effort") + 1] == "high" + + async def test_omits_effort_flag_when_none(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in adapter.stream(cwd="/tmp", prompt="test"): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--effort" not in cmd_args + + async def test_includes_allow_all_tools_by_default(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in adapter.stream(cwd="/tmp", prompt="test", auto_approve=True): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--allow-all-tools" in cmd_args + + async def test_omits_allow_all_tools_when_disabled(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in adapter.stream(cwd="/tmp", prompt="test", auto_approve=False): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--allow-all-tools" not in cmd_args + + async def test_includes_allow_tool_for_each_allowed_tool(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in adapter.stream( + cwd="/tmp", prompt="test", allowed_tools=["Bash", "Read"] + ): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + allow_tool_indices = [i for i, x in enumerate(cmd_args) if x == "--allow-tool"] + assert len(allow_tool_indices) == 2 + assert cmd_args[allow_tool_indices[0] + 1] == "Bash" + assert cmd_args[allow_tool_indices[1] + 1] == "Read" + + async def test_omits_allow_tool_when_none(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in adapter.stream(cwd="/tmp", prompt="test", allowed_tools=None): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--allow-tool" not in cmd_args + + async def test_includes_resume_flag_when_session_id_provided(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in adapter.stream( + cwd="/tmp", prompt="test", session_id="ses_abc123" + ): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--resume" in cmd_args + assert cmd_args[cmd_args.index("--resume") + 1] == "ses_abc123" + + async def test_omits_resume_flag_when_no_session_id(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in adapter.stream(cwd="/tmp", prompt="test"): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert "--resume" not in cmd_args + + async def test_skips_malformed_json_lines(self): + # Arrange + adapter = CopilotCLIAdapter() + raw = json.dumps(MESSAGE_DELTA_EVENT) + "\n" + "not valid json\n" + json.dumps(RESULT_EVENT_SUCCESS) + "\n" + chunks = [raw.encode("utf-8"), b""] + + mock_process = AsyncMock() + mock_process.stdout = MagicMock() + mock_process.stdout.read = AsyncMock(side_effect=chunks) + mock_process.stderr = MagicMock() + mock_process.stderr.read = AsyncMock(return_value=b"") + mock_process.returncode = 0 + mock_process.wait = AsyncMock() + mock_process.pid = 12345 + + # Act + events: list[StreamEvent] = [] + with patch("asyncio.create_subprocess_exec", return_value=mock_process): + async for event in adapter.stream(cwd="/tmp", prompt="test"): + events.append(event) + + # Assert + assert len(events) == 2 + assert events[0].type == "text" + assert events[1].type == "result" + + async def test_base_command_is_copilot_with_flags(self): + # Arrange + adapter = CopilotCLIAdapter() + ndjson = [MESSAGE_DELTA_EVENT, RESULT_EVENT_SUCCESS] + mock_process = _make_mock_process(ndjson) + + # Act + with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec: + async for _ in adapter.stream(cwd="/tmp", prompt="say hello"): + pass + + # Assert + cmd_args = mock_exec.call_args[0] + assert cmd_args[0] == "copilot" + assert cmd_args[1] == "-p" + assert cmd_args[2] == "say hello" + assert "--output-format" in cmd_args + assert cmd_args[cmd_args.index("--output-format") + 1] == "json" + assert "--silent" in cmd_args diff --git a/tests/unit/test_shell.py b/tests/unit/test_shell.py index 0874034..f59bc9c 100644 --- a/tests/unit/test_shell.py +++ b/tests/unit/test_shell.py @@ -4,6 +4,7 @@ from agent_shell.models.agent import AgentType from agent_shell.adapters.claude_code_adapter import ClaudeCodeAdapter from agent_shell.adapters.opencode_adapter import OpenCodeAdapter +from agent_shell.adapters.copilot_cli_adapter import CopilotCLIAdapter class TestResolveAdapter: @@ -21,10 +22,17 @@ def test_resolves_opencode(self): # Assert assert isinstance(shell._adapter, OpenCodeAdapter) + def test_resolves_copilot_cli(self): + # Arrange / Act + shell = AgentShell(agent_type=AgentType.COPILOT_CLI) + + # Assert + assert isinstance(shell._adapter, CopilotCLIAdapter) + def test_raises_for_unsupported_agent(self): # Arrange / Act / Assert with pytest.raises(ValueError, match="Unsupported agent"): - AgentShell(agent_type=AgentType.GEMINI_CLI) + AgentShell(agent_type=AgentType.CODEX) class TestCwdValidation: From 061632bf035265b728180450ba6e4f39f3262b36 Mon Sep 17 00:00:00 2001 From: Scott Raisbeck Date: Tue, 21 Apr 2026 00:08:43 +0100 Subject: [PATCH 2/2] Address PR review: drop turn_start system event, add duration to AgentResponse, assert unregister_process_group --- src/agent_shell/adapters/claude_code_adapter.py | 3 ++- src/agent_shell/adapters/copilot_cli_adapter.py | 8 +++----- src/agent_shell/adapters/opencode_adapter.py | 3 ++- src/agent_shell/models/agent.py | 1 + tests/unit/test_copilot_cli_cancel.py | 7 +++++-- tests/unit/test_copilot_cli_parse_event.py | 7 ++----- tests/unit/test_copilot_cli_stream.py | 12 +++++------- 7 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/agent_shell/adapters/claude_code_adapter.py b/src/agent_shell/adapters/claude_code_adapter.py index ae93238..cd45744 100644 --- a/src/agent_shell/adapters/claude_code_adapter.py +++ b/src/agent_shell/adapters/claude_code_adapter.py @@ -39,8 +39,9 @@ async def execute( text = "\n".join(e.content for e in chunks if e.type == "text") cost = next((e.cost for e in reversed(chunks) if e.type == "result"), 0.0) + duration = next((e.duration for e in reversed(chunks) if e.type == "result"), 0.0) returned_session_id = next((e.session_id for e in chunks if e.session_id), None) - return AgentResponse(response=text, cost=cost, session_id=returned_session_id) + return AgentResponse(response=text, cost=cost, session_id=returned_session_id, duration=duration) async def stream( self, diff --git a/src/agent_shell/adapters/copilot_cli_adapter.py b/src/agent_shell/adapters/copilot_cli_adapter.py index 2108d10..1564cd7 100644 --- a/src/agent_shell/adapters/copilot_cli_adapter.py +++ b/src/agent_shell/adapters/copilot_cli_adapter.py @@ -40,8 +40,9 @@ async def execute( text = "\n".join(e.content for e in chunks if e.type == "text") cost = next((e.cost for e in reversed(chunks) if e.type == "result"), 0.0) + duration = next((e.duration for e in reversed(chunks) if e.type == "result"), 0.0) returned_session_id = next((e.session_id for e in chunks if e.session_id), None) - return AgentResponse(response=text, cost=cost, session_id=returned_session_id) + return AgentResponse(response=text, cost=cost, session_id=returned_session_id, duration=duration) async def stream( self, @@ -141,10 +142,7 @@ def _parse_event(self, event: dict, include_thinking: bool) -> list[StreamEvent] t = event.get("type", "") events = [] - if t == "assistant.turn_start": - events.append(StreamEvent(type="system", content="")) - - elif t == "assistant.reasoning_delta" and include_thinking: + if t == "assistant.reasoning_delta" and include_thinking: delta_content = event.get("data", {}).get("deltaContent", "") if delta_content: events.append(StreamEvent(type="thinking", content=delta_content)) diff --git a/src/agent_shell/adapters/opencode_adapter.py b/src/agent_shell/adapters/opencode_adapter.py index 88b1ace..1b68b7d 100644 --- a/src/agent_shell/adapters/opencode_adapter.py +++ b/src/agent_shell/adapters/opencode_adapter.py @@ -40,8 +40,9 @@ async def execute( text = "\n".join(e.content for e in chunks if e.type == "text") cost = next((e.cost for e in reversed(chunks) if e.type == "result"), 0.0) + duration = next((e.duration for e in reversed(chunks) if e.type == "result"), 0.0) returned_session_id = next((e.session_id for e in chunks if e.session_id), None) - return AgentResponse(response=text, cost=cost, session_id=returned_session_id) + return AgentResponse(response=text, cost=cost, session_id=returned_session_id, duration=duration) async def stream( self, diff --git a/src/agent_shell/models/agent.py b/src/agent_shell/models/agent.py index cda03e0..4da8933 100644 --- a/src/agent_shell/models/agent.py +++ b/src/agent_shell/models/agent.py @@ -13,6 +13,7 @@ class AgentResponse: response: str cost: float session_id: str | None = None + duration: float = 0.0 @dataclass class StreamEvent: diff --git a/tests/unit/test_copilot_cli_cancel.py b/tests/unit/test_copilot_cli_cancel.py index f87f393..ebd9536 100644 --- a/tests/unit/test_copilot_cli_cancel.py +++ b/tests/unit/test_copilot_cli_cancel.py @@ -1,4 +1,4 @@ -from unittest.mock import patch, AsyncMock +from unittest.mock import patch, AsyncMock, MagicMock from agent_shell.adapters.copilot_cli_adapter import CopilotCLIAdapter @@ -12,11 +12,14 @@ async def test_kills_active_processes_and_clears_list(self): adapter._active_processes = [mock_process] # Act + mock_unregister = MagicMock() with patch("os.getpgid", return_value=12345) as mock_getpgid, \ - patch("os.killpg") as mock_killpg: + patch("os.killpg") as mock_killpg, \ + patch("agent_shell.adapters.copilot_cli_adapter.unregister_process_group", mock_unregister): await adapter.cancel() # Assert mock_getpgid.assert_called_once_with(12345) mock_killpg.assert_called_once_with(12345, 9) assert len(adapter._active_processes) == 0 + mock_unregister.assert_called_once_with(12345) diff --git a/tests/unit/test_copilot_cli_parse_event.py b/tests/unit/test_copilot_cli_parse_event.py index 11cdb9a..bf93d8b 100644 --- a/tests/unit/test_copilot_cli_parse_event.py +++ b/tests/unit/test_copilot_cli_parse_event.py @@ -77,7 +77,7 @@ def test_ignores_user_message_event(self): class TestParseEventTurnStart: - def test_emits_system_event(self): + def test_ignores_turn_start_no_event_emitted(self): # Arrange adapter = CopilotCLIAdapter() @@ -85,10 +85,7 @@ def test_emits_system_event(self): events = adapter._parse_event(TURN_START_EVENT, include_thinking=False) # Assert - assert len(events) == 1 - assert events[0].type == "system" - assert events[0].content == "" - assert events[0].session_id is None + assert events == [] class TestParseEventReasoning: diff --git a/tests/unit/test_copilot_cli_stream.py b/tests/unit/test_copilot_cli_stream.py index da750f5..d3625ee 100644 --- a/tests/unit/test_copilot_cli_stream.py +++ b/tests/unit/test_copilot_cli_stream.py @@ -6,7 +6,6 @@ from agent_shell.models.agent import StreamEvent from tests.unit.copilot_fixtures import ( - TURN_START_EVENT, MESSAGE_DELTA_EVENT, MESSAGE_EVENT_NO_TOOLS, RESULT_EVENT_SUCCESS, @@ -33,7 +32,7 @@ class TestStream: async def test_yields_events_in_order(self): # Arrange adapter = CopilotCLIAdapter() - ndjson = [TURN_START_EVENT, MESSAGE_DELTA_EVENT, MESSAGE_EVENT_NO_TOOLS, RESULT_EVENT_SUCCESS] + ndjson = [MESSAGE_DELTA_EVENT, MESSAGE_EVENT_NO_TOOLS, RESULT_EVENT_SUCCESS] mock_process = _make_mock_process(ndjson) # Act @@ -43,11 +42,10 @@ async def test_yields_events_in_order(self): events.append(event) # Assert - assert len(events) == 3 - assert events[0].type == "system" - assert events[1].type == "text" - assert events[1].content == "HEL" - assert events[2].type == "result" + assert len(events) == 2 + assert events[0].type == "text" + assert events[0].content == "HEL" + assert events[1].type == "result" async def test_yields_error_event_on_nonzero_exit_with_stderr(self): # Arrange