diff --git a/README.md b/README.md index 4e85561..709c247 100644 --- a/README.md +++ b/README.md @@ -45,17 +45,17 @@ result = agent.invoke({"messages": "Research the latest trends in AI and write a ## Configuration -Constructor options let you change the image, workspace path, command timeout, resource limits, outbound network access, and any extra `docker run` flags: +Constructor options let you change the docker image of the container, shared folder path, command timeout, resource limits, outbound network access, and any extra `docker run` flags: ```python DockerSandbox( image="python:3.12-bookworm", # default image (Debian-based, includes curl, etc.) allow_outbound_traffic=True, # False → no network; True (default) → allow outbound traffic - workspace_dir="/path/to/project", # host dir for agent files; see note below + shared_dir="/path/to/project", # host folder shared with the container; see note below timeout=120, # per-command timeout (seconds) max_output_bytes=100_000, # combined stdout/stderr cap per command - memory="512m", - cpus=1.0, + memory="256m", # default memory limit + cpus=0.5, # default CPU limit pids_limit=128, auto_remove=True, # remove container on close() extra_run_args=["--env", "FOO=bar"], @@ -63,24 +63,15 @@ DockerSandbox( ``` > [!NOTE] -> When `workspace_dir` is omitted, a temporary directory is created under the host temp folder and **removed on `close()`** when the sandbox owns it. Pass an explicit path to keep files after the container stops. +> Pass an explicit `shared_dir` path to keep files after the container stops. When omitted, a temporary directory is created within the host filesystem and **removed when the DockerSandbox is closed**. ## How it works -`DockerSandbox` implements the Deep Agents backend protocol by splitting work across the host and a container: - -- **File tools** (`read`, `write`, `edit`, `grep`, `glob`, `ls`) run against a workspace directory on your machine. -- **`execute`** runs shell commands in a long-lived Docker container. The same directory is bind-mounted at `/workspace`, so files stay in sync between tools and commands. - -On startup, the sandbox creates a container with conservative defaults: - -- [`python:3.12-bookworm`](https://hub.docker.com/_/python) as the default image -- Outbound traffic allowed by default -- No elevated Linux privileges -- Read-only root filesystem (with small `tmpfs` mounts for `/tmp` and `/var/tmp`) -- Memory, CPU, and PID limits +The creation of a `DockerSandbox` object results in the starting of a long-running docker container. Every shell command executed by the agent is actually run **inside the container**, not on your host OS. Therefore, library installations, cURL downloads, and any other filesystem changes stay inside Docker, not on your host. The only link between the container and your machine is **shared_dir** (if provided), a folder on disk that is mounted at `/shared` (with that directory as the shell working directory) so you can share files between the agent and your host. +> [!NOTE] +> Docker does not allow bind-mounting a volume to `/` (it would hide the image’s system files and break the container). File tools (`read_file`, `write_file`, …) use virtual paths under `/` (for example `/sales.csv`); shell commands run in `/shared`, so the same file is `sales.csv` or `/shared/sales.csv` in the container. > [!NOTE] > The container is stopped and removed automatically when the Python process exits (`atexit`). Use a context manager (below) to tear down earlier. @@ -104,14 +95,14 @@ print("Done!") ## Example -The [pizza agent](examples/pizza_agent.py) searches the web for a Neapolitan pizza recipe and writes it to a file in the workspace: +The [pizza agent](examples/pizza_agent.py) searches the web for a Neapolitan pizza recipe and writes it to a file in the shared folder: ```python from deepagents import create_deep_agent from deepagents_docker import DockerSandbox backend = DockerSandbox( - workspace_dir="examples/data", + shared_dir="examples/data", allow_outbound_traffic=True, ) @@ -154,7 +145,7 @@ Contributions are welcome! Please feel free to open an issue or submit a pull re ## Security -Use this for trusted workloads and development, not as a hard multi-tenant boundary. Do not put secrets in the workspace. See [Deep Agents security](https://github.com/langchain-ai/deepagents?tab=security-ov-file). +Use this for trusted workloads and development, not as a hard multi-tenant boundary. Do not put secrets in the shared folder. See [Deep Agents security](https://github.com/langchain-ai/deepagents?tab=security-ov-file). ## License diff --git a/examples/data/sales.csv b/examples/data/sales.csv new file mode 100644 index 0000000..614c690 --- /dev/null +++ b/examples/data/sales.csv @@ -0,0 +1,140 @@ +date,product,quantity,price +2026-01-01,pizza,10,12.99 +2026-01-01,pasta,14,9.99 +2026-01-01,salad,4,15.99 +2026-01-01,espresso,22,2.50 +2026-01-01,tiramisu,6,6.99 +2026-01-02,pizza,18,12.99 +2026-01-02,pasta,9,9.99 +2026-01-02,calzone,5,11.99 +2026-01-02,salad,7,15.99 +2026-01-02,house wine,8,7.50 +2026-01-03,pizza,24,12.99 +2026-01-03,pasta,12,9.99 +2026-01-03,margherita,15,10.99 +2026-01-03,diavola,11,13.99 +2026-01-03,espresso,31,2.50 +2026-01-04,pizza,8,12.99 +2026-01-04,pasta,16,9.99 +2026-01-04,salad,5,15.99 +2026-01-04,tiramisu,9,6.99 +2026-01-05,pizza,21,12.99 +2026-01-05,calzone,7,11.99 +2026-01-05,margherita,19,10.99 +2026-01-05,house wine,12,7.50 +2026-01-05,espresso,28,2.50 +2026-01-06,pizza,14,12.99 +2026-01-06,pasta,11,9.99 +2026-01-06,diavola,8,13.99 +2026-01-06,salad,6,15.99 +2026-01-07,pizza,27,12.99 +2026-01-07,margherita,22,10.99 +2026-01-07,pasta,13,9.99 +2026-01-07,tiramisu,14,6.99 +2026-01-07,house wine,15,7.50 +2026-01-08,pizza,12,12.99 +2026-01-08,calzone,9,11.99 +2026-01-08,salad,8,15.99 +2026-01-08,espresso,19,2.50 +2026-01-09,pizza,19,12.99 +2026-01-09,pasta,17,9.99 +2026-01-09,diavola,10,13.99 +2026-01-09,margherita,16,10.99 +2026-01-10,pizza,32,12.99 +2026-01-10,pasta,20,9.99 +2026-01-10,calzone,11,11.99 +2026-01-10,tiramisu,18,6.99 +2026-01-10,house wine,20,7.50 +2026-01-11,pizza,15,12.99 +2026-01-11,salad,10,15.99 +2026-01-11,espresso,25,2.50 +2026-01-12,pizza,23,12.99 +2026-01-12,margherita,14,10.99 +2026-01-12,pasta,15,9.99 +2026-01-12,diavola,12,13.99 +2026-01-13,pizza,11,12.99 +2026-01-13,calzone,6,11.99 +2026-01-13,salad,9,15.99 +2026-01-13,tiramisu,7,6.99 +2026-01-14,pizza,26,12.99 +2026-01-14,pasta,18,9.99 +2026-01-14,margherita,20,10.99 +2026-01-14,house wine,11,7.50 +2026-01-14,espresso,34,2.50 +2026-01-15,pizza,17,12.99 +2026-01-15,diavola,9,13.99 +2026-01-15,salad,12,15.99 +2026-01-16,pizza,29,12.99 +2026-01-16,pasta,14,9.99 +2026-01-16,calzone,10,11.99 +2026-01-16,tiramisu,11,6.99 +2026-01-17,pizza,35,12.99 +2026-01-17,margherita,25,10.99 +2026-01-17,pasta,22,9.99 +2026-01-17,house wine,18,7.50 +2026-01-17,espresso,41,2.50 +2026-01-18,pizza,13,12.99 +2026-01-18,salad,7,15.99 +2026-01-18,diavola,6,13.99 +2026-01-19,pizza,20,12.99 +2026-01-19,pasta,10,9.99 +2026-01-19,calzone,8,11.99 +2026-01-19,tiramisu,10,6.99 +2026-01-20,pizza,22,12.99 +2026-01-20,margherita,17,10.99 +2026-01-20,salad,11,15.99 +2026-01-20,house wine,9,7.50 +2026-01-21,pizza,16,12.99 +2026-01-21,pasta,19,9.99 +2026-01-21,diavola,13,13.99 +2026-01-21,espresso,27,2.50 +2026-01-22,pizza,28,12.99 +2026-01-22,margherita,21,10.99 +2026-01-22,calzone,12,11.99 +2026-01-22,tiramisu,15,6.99 +2026-01-23,pizza,9,12.99 +2026-01-23,pasta,8,9.99 +2026-01-23,salad,5,15.99 +2026-01-24,pizza,31,12.99 +2026-01-24,pasta,24,9.99 +2026-01-24,margherita,18,10.99 +2026-01-24,house wine,16,7.50 +2026-01-24,espresso,36,2.50 +2026-01-25,pizza,25,12.99 +2026-01-25,diavola,14,13.99 +2026-01-25,calzone,9,11.99 +2026-01-25,tiramisu,12,6.99 +2026-01-26,pizza,14,12.99 +2026-01-26,pasta,12,9.99 +2026-01-26,salad,8,15.99 +2026-01-27,pizza,19,12.99 +2026-01-27,margherita,16,10.99 +2026-01-27,house wine,10,7.50 +2026-01-27,espresso,23,2.50 +2026-01-28,pizza,27,12.99 +2026-01-28,pasta,15,9.99 +2026-01-28,diavola,11,13.99 +2026-01-28,tiramisu,8,6.99 +2026-01-29,pizza,12,12.99 +2026-01-29,calzone,7,11.99 +2026-01-29,salad,9,15.99 +2026-01-30,pizza,30,12.99 +2026-01-30,margherita,23,10.99 +2026-01-30,pasta,21,9.99 +2026-01-30,house wine,14,7.50 +2026-01-30,espresso,38,2.50 +2026-01-31,pizza,33,12.99 +2026-01-31,pasta,18,9.99 +2026-01-31,diavola,15,13.99 +2026-01-31,tiramisu,16,6.99 +2026-02-01,pizza,11,12.99 +2026-02-01,salad,6,15.99 +2026-02-01,espresso,20,2.50 +2026-02-02,pizza,21,12.99 +2026-02-02,margherita,19,10.99 +2026-02-02,pasta,14,9.99 +2026-02-02,calzone,8,11.99 +2026-02-03,pizza,24,12.99 +2026-02-03,diavola,12,13.99 +2026-02-03,house wine,13,7.50 +2026-02-03,tiramisu,9,6.99 diff --git a/examples/pizza_agent.py b/examples/pizza_agent.py index a77adbc..1535fce 100644 --- a/examples/pizza_agent.py +++ b/examples/pizza_agent.py @@ -1,12 +1,17 @@ +""" + +This example agent finds the best neapolitan pizza recipe on the web and writes it to a file. + +""" + from deepagents import create_deep_agent from deepagents_docker import DockerSandbox backend = DockerSandbox( - workspace_dir="examples/data", + shared_dir="examples/data", allow_outbound_traffic=True, ) - agent = create_deep_agent( model="openai:gpt-5.5", backend=backend, diff --git a/examples/sales_analyst.py b/examples/sales_analyst.py new file mode 100644 index 0000000..0303b96 --- /dev/null +++ b/examples/sales_analyst.py @@ -0,0 +1,38 @@ +""" + +In this example, we create an agent that analyzes sales data and writes a report to a markdown file. +Please note that the agent will create a python script to analyze the data, in case of missing packages, +it will automatically install any necessary python packages to make the analysis possible (like pandas, matplotlib, etc.). +Finally, it will run the script to generate the report as well as the images. +""" + +from deepagents import create_deep_agent + +from deepagents_docker import DockerSandbox + +backend = DockerSandbox( + shared_dir="examples/data", + allow_outbound_traffic=True, +) + + +agent = create_deep_agent( + model="openai:gpt-5.5", + backend=backend, + system_prompt="""You are a sales analyst assistant.""", +) + +if __name__ == "__main__": + for step in agent.stream( + { + "messages": """ + Analyze the "sales.csv" data and write a report (with charts) into a file called "sales_report.md". + Do not write any temporary files in our "shared" directory: write only the final report and put the images into a "img" directory. + """ + }, + stream_mode="updates", + ): + for update in step.values(): + if update and (messages := update.get("messages")): + for message in messages: + message.pretty_print() diff --git a/src/deepagents_docker/__init__.py b/src/deepagents_docker/__init__.py index fac3d8e..31d72b3 100644 --- a/src/deepagents_docker/__init__.py +++ b/src/deepagents_docker/__init__.py @@ -1,5 +1,3 @@ -"""Docker-backed sandbox backend for DeepAgents.""" - from .backend import DockerSandbox from .errors import DockerError diff --git a/src/deepagents_docker/_docker.py b/src/deepagents_docker/_docker.py index a236443..9308898 100644 --- a/src/deepagents_docker/_docker.py +++ b/src/deepagents_docker/_docker.py @@ -1,7 +1,3 @@ -"""Low-level helpers for invoking the Docker CLI.""" - -from __future__ import annotations - import json import subprocess from collections.abc import Sequence @@ -55,17 +51,6 @@ def docker_available() -> bool: return result.returncode == 0 -def inspect_container_id(container_name: str) -> str: - """Return the container ID for a running container name.""" - result = run_docker( - ["inspect", "--format", "{{.Id}}", container_name], - ) - if result.returncode != 0: - msg = result.stderr.strip() or f"failed to inspect container {container_name!r}" - raise DockerError(msg) - return result.stdout.strip() - - def format_docker_error(result: DockerRunResult) -> str: """Combine stderr/stdout into a single error string.""" detail = (result.stderr or result.stdout).strip() diff --git a/src/deepagents_docker/backend.py b/src/deepagents_docker/backend.py index 15f09b4..7fb2bef 100644 --- a/src/deepagents_docker/backend.py +++ b/src/deepagents_docker/backend.py @@ -1,9 +1,6 @@ -"""DockerSandbox: isolated shell execution with host-backed workspace files.""" - from __future__ import annotations import atexit -import shlex import tempfile import uuid from pathlib import Path @@ -14,14 +11,13 @@ from ._docker import ( docker_available, format_docker_error, - inspect_container_id, run_docker, ) from .errors import DockerError DEFAULT_EXECUTE_TIMEOUT = 120 DEFAULT_IMAGE = "python:3.12-bookworm" -CONTAINER_WORKDIR = "/workspace" +CONTAINER_WORKDIR = "/shared" class DockerSandbox(FilesystemBackend, SandboxBackendProtocol): @@ -32,7 +28,7 @@ def __init__( *, image: str = DEFAULT_IMAGE, allow_outbound_traffic: bool = True, - workspace_dir: str | Path | None = None, + shared_dir: str | Path | None = None, timeout: int = DEFAULT_EXECUTE_TIMEOUT, max_output_bytes: int = 100_000, memory: str = "256m", @@ -41,12 +37,12 @@ def __init__( auto_remove: bool = True, extra_run_args: list[str] | None = None, ) -> None: - """Create a sandbox container and workspace directory. + """Create a sandbox container and shared directory. Args: image: Docker image for command execution (default: official ``python:3.12-bookworm``). allow_outbound_traffic: Allow/deny outbound network traffic (default: allow). - workspace_dir: Host directory for agent files. A temporary directory is + shared_dir: Host directory shared with the container. A temporary directory is created when omitted. timeout: Default command timeout in seconds. max_output_bytes: Maximum combined stdout/stderr captured per command. @@ -66,19 +62,15 @@ def __init__( msg = f"pids_limit must be positive, got {pids_limit}" raise ValueError(msg) - self._owns_workspace = workspace_dir is None - self._workspace = Path( - tempfile.mkdtemp(prefix="deepagents-docker-") - if workspace_dir is None - else workspace_dir, + self._owns_shared_dir = shared_dir is None + self._shared_dir = Path( + tempfile.mkdtemp(prefix="deepagents-docker-shared-") + if shared_dir is None + else shared_dir, ).resolve() - self._workspace.mkdir(parents=True, exist_ok=True) + self._shared_dir.mkdir(parents=True, exist_ok=True) - super().__init__( - root_dir=self._workspace, - virtual_mode=True, - max_file_size_mb=10, - ) + super().__init__(root_dir=self._shared_dir, virtual_mode=True) self._image = image self._default_timeout = timeout @@ -90,22 +82,22 @@ def __init__( self._auto_remove = auto_remove self._extra_run_args = list(extra_run_args or []) - self._container_name = f"deepagents-docker-{uuid.uuid4().hex[:12]}" - self._container_id: str | None = None + self._container_id: str = f"{uuid.uuid4().hex[:12]}" + self._container_name = f"deepagents-docker-{self._container_id}" self._closed = False self._start_container() atexit.register(self.close) @property - def workspace_dir(self) -> Path: - """Host path backing the agent workspace.""" - return self._workspace + def shared_dir(self) -> Path: + """Host path of the folder shared with the container.""" + return self._shared_dir @property def id(self) -> str: """Unique identifier for this sandbox instance.""" - return self._container_id or self._container_name + return self._container_id def _start_container(self) -> None: if not docker_available(): @@ -128,17 +120,8 @@ def _start_container(self) -> None: self._memory, "--pids-limit", str(self._pids_limit), - "--security-opt", - "no-new-privileges", - "--cap-drop", - "ALL", - "--read-only", - "--tmpfs", - "/tmp:rw,noexec,nosuid,size=512m", - "--tmpfs", - "/var/tmp:rw,noexec,nosuid,size=512m", "-v", - f"{self._workspace}:{CONTAINER_WORKDIR}:rw", + f"{self._shared_dir}:{CONTAINER_WORKDIR}:rw", "-w", CONTAINER_WORKDIR, *self._extra_run_args, @@ -151,12 +134,6 @@ def _start_container(self) -> None: msg = format_docker_error(result) raise DockerError(f"failed to start sandbox container: {msg}") - self._container_id = inspect_container_id(self._container_name) - - def _wrap_command(self, command: str) -> str: - """Run agent commands from the container workspace directory.""" - return f"cd {shlex.quote(CONTAINER_WORKDIR)} && {command}" - def execute( self, command: str, @@ -183,15 +160,12 @@ def execute( msg = f"timeout must be positive, got {effective_timeout}" raise ValueError(msg) - wrapped = self._wrap_command(command) docker_args = [ "exec", - "-w", - CONTAINER_WORKDIR, self._container_name, "sh", "-c", - wrapped, + command, ] try: @@ -262,10 +236,10 @@ def close(self) -> None: if self._auto_remove: run_docker(["rm", "-f", self._container_name], timeout=30) - if self._owns_workspace: + if self._owns_shared_dir: import shutil - shutil.rmtree(self._workspace, ignore_errors=True) + shutil.rmtree(self._shared_dir, ignore_errors=True) def __enter__(self) -> DockerSandbox: return self diff --git a/tests/test_docker_helpers.py b/tests/test_docker_helpers.py index dd9b8b8..fbeb433 100644 --- a/tests/test_docker_helpers.py +++ b/tests/test_docker_helpers.py @@ -1,7 +1,3 @@ -"""Unit tests for low-level Docker CLI helpers.""" - -from __future__ import annotations - import subprocess from unittest.mock import MagicMock, patch @@ -11,7 +7,6 @@ DockerRunResult, docker_available, format_docker_error, - inspect_container_id, run_docker, ) from deepagents_docker.errors import DockerError @@ -54,26 +49,6 @@ def test_docker_available_false_when_info_fails(run_docker: MagicMock) -> None: assert docker_available() is False -@patch("deepagents_docker._docker.run_docker") -def test_inspect_container_id_returns_stdout(run_docker: MagicMock) -> None: - run_docker.return_value = DockerRunResult(returncode=0, stdout="abc123\n", stderr="") - assert inspect_container_id("my-container") == "abc123" - - -@patch("deepagents_docker._docker.run_docker") -def test_inspect_container_id_raises_on_failure(run_docker: MagicMock) -> None: - run_docker.return_value = DockerRunResult(returncode=1, stdout="", stderr="no such object") - with pytest.raises(DockerError, match="no such object"): - inspect_container_id("missing") - - -@patch("deepagents_docker._docker.run_docker") -def test_inspect_container_id_fallback_message(run_docker: MagicMock) -> None: - run_docker.return_value = DockerRunResult(returncode=1, stdout="", stderr="") - with pytest.raises(DockerError, match="failed to inspect container 'missing'"): - inspect_container_id("missing") - - @patch("deepagents_docker._docker.subprocess.run") def test_run_docker_returns_captured_output(subprocess_run: MagicMock) -> None: completed = MagicMock() diff --git a/tests/test_docker_sandbox.py b/tests/test_docker_sandbox.py index 22c4ebe..8ff04c9 100644 --- a/tests/test_docker_sandbox.py +++ b/tests/test_docker_sandbox.py @@ -1,7 +1,3 @@ -"""Unit tests for DockerSandbox (Docker CLI mocked).""" - -from __future__ import annotations - from pathlib import Path from unittest.mock import MagicMock, patch @@ -9,7 +5,7 @@ from deepagents_docker import DockerError, DockerSandbox from deepagents_docker._docker import DockerRunResult -from deepagents_docker.backend import DEFAULT_IMAGE +from deepagents_docker.backend import CONTAINER_WORKDIR, DEFAULT_IMAGE def _docker_run_ok() -> DockerRunResult: @@ -34,16 +30,14 @@ def _run_docker(args: list[str], **kwargs: object) -> DockerRunResult: @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="abc123container") def test_default_image_is_python_bookworm( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, ) -> None: run_docker.return_value = _docker_run_ok() - sandbox = DockerSandbox(workspace_dir=tmp_path) + sandbox = DockerSandbox(shared_dir=tmp_path) try: run_args = run_docker.call_args_list[0][0][0] assert DEFAULT_IMAGE in run_args @@ -57,40 +51,38 @@ def test_default_image_is_python_bookworm( @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="abc123container") -def test_start_container_applies_security_flags( - _inspect: MagicMock, +def test_start_container_mounts_shared_dir( _available: MagicMock, run_docker: MagicMock, tmp_path: Path, ) -> None: run_docker.return_value = _docker_run_ok() - sandbox = DockerSandbox(workspace_dir=tmp_path, image="test-image:local") + sandbox = DockerSandbox(shared_dir=tmp_path, image="test-image:local") try: run_args = run_docker.call_args_list[0][0][0] assert "run" in run_args assert "--network" in run_args and "bridge" in run_args - assert "--cap-drop" in run_args and "ALL" in run_args - assert "--read-only" in run_args - assert f"{tmp_path.resolve()}:/workspace:rw" in " ".join(run_args) - assert sandbox.id == "abc123container" + assert "--cap-drop" not in run_args + assert "--read-only" not in run_args + assert f"{tmp_path.resolve()}:{CONTAINER_WORKDIR}:rw" in run_args + workdir_index = run_args.index("-w") + assert run_args[workdir_index + 1] == CONTAINER_WORKDIR + assert len(sandbox.id) == 12 finally: sandbox.close() @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_start_container_disables_outbound_traffic( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, ) -> None: run_docker.return_value = _docker_run_ok() - sandbox = DockerSandbox(workspace_dir=tmp_path, allow_outbound_traffic=False) + sandbox = DockerSandbox(shared_dir=tmp_path, allow_outbound_traffic=False) try: run_args = run_docker.call_args_list[0][0][0] network_index = run_args.index("--network") @@ -101,9 +93,7 @@ def test_start_container_disables_outbound_traffic( @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_start_container_applies_resource_limits_and_extra_args( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, @@ -111,7 +101,7 @@ def test_start_container_applies_resource_limits_and_extra_args( run_docker.return_value = _docker_run_ok() sandbox = DockerSandbox( - workspace_dir=tmp_path, + shared_dir=tmp_path, memory="1g", cpus=2.5, pids_limit=256, @@ -127,36 +117,17 @@ def test_start_container_applies_resource_limits_and_extra_args( sandbox.close() -@patch("deepagents_docker.backend.inspect_container_id") @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) def test_raises_when_container_start_fails( _available: MagicMock, run_docker: MagicMock, - inspect: MagicMock, tmp_path: Path, ) -> None: run_docker.return_value = DockerRunResult(returncode=1, stdout="", stderr="image not found") with pytest.raises(DockerError, match="failed to start sandbox container: image not found"): - DockerSandbox(workspace_dir=tmp_path) - - inspect.assert_not_called() - - -@patch("deepagents_docker.backend.run_docker") -@patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", side_effect=DockerError("inspect failed")) -def test_raises_when_container_inspect_fails( - _inspect: MagicMock, - _available: MagicMock, - run_docker: MagicMock, - tmp_path: Path, -) -> None: - run_docker.return_value = _docker_run_ok() - - with pytest.raises(DockerError, match="inspect failed"): - DockerSandbox(workspace_dir=tmp_path) + DockerSandbox(shared_dir=tmp_path) @pytest.mark.parametrize( @@ -189,9 +160,7 @@ def test_docker_error_is_public_runtime_error() -> None: @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") -def test_execute_wraps_command_and_returns_output( - _inspect: MagicMock, +def test_execute_runs_command_in_container( _available: MagicMock, run_docker: MagicMock, tmp_path: Path, @@ -200,26 +169,22 @@ def test_execute_wraps_command_and_returns_output( exec=DockerRunResult(returncode=0, stdout="hello\n", stderr=""), ) - sandbox = DockerSandbox(workspace_dir=tmp_path, image="test-image:local") + sandbox = DockerSandbox(shared_dir=tmp_path, image="test-image:local") try: result = sandbox.execute("echo hello") assert result.exit_code == 0 assert "hello" in result.output exec_args = run_docker.call_args_list[1][0][0] - assert exec_args[:4] == ["exec", "-w", "/workspace", sandbox._container_name] - shell_cmd = exec_args[-1] - assert shell_cmd.startswith("cd /workspace && ") - assert "echo hello" in shell_cmd + assert exec_args[:2] == ["exec", sandbox._container_name] + assert exec_args[-1] == "echo hello" finally: sandbox.close() @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_execute_formats_stderr_and_nonzero_exit( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, @@ -228,7 +193,7 @@ def test_execute_formats_stderr_and_nonzero_exit( exec=DockerRunResult(returncode=2, stdout="", stderr="something broke\n"), ) - sandbox = DockerSandbox(workspace_dir=tmp_path) + sandbox = DockerSandbox(shared_dir=tmp_path) try: result = sandbox.execute("false") assert result.exit_code == 2 @@ -240,9 +205,7 @@ def test_execute_formats_stderr_and_nonzero_exit( @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_execute_reports_no_output( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, @@ -251,7 +214,7 @@ def test_execute_reports_no_output( exec=DockerRunResult(returncode=0, stdout="", stderr=""), ) - sandbox = DockerSandbox(workspace_dir=tmp_path) + sandbox = DockerSandbox(shared_dir=tmp_path) try: result = sandbox.execute("true") assert result.output == "" @@ -261,9 +224,7 @@ def test_execute_reports_no_output( @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_execute_truncates_large_output( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, @@ -272,7 +233,7 @@ def test_execute_truncates_large_output( exec=DockerRunResult(returncode=0, stdout="x" * 200, stderr=""), ) - sandbox = DockerSandbox(workspace_dir=tmp_path, max_output_bytes=50) + sandbox = DockerSandbox(shared_dir=tmp_path, max_output_bytes=50) try: result = sandbox.execute("printf x") assert result.truncated is True @@ -284,16 +245,14 @@ def test_execute_truncates_large_output( @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_execute_rejects_empty_command( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, ) -> None: run_docker.return_value = _docker_run_ok() - sandbox = DockerSandbox(workspace_dir=tmp_path) + sandbox = DockerSandbox(shared_dir=tmp_path) try: result = sandbox.execute("") assert result.exit_code == 1 @@ -304,16 +263,14 @@ def test_execute_rejects_empty_command( @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_execute_after_close_returns_error( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, ) -> None: run_docker.return_value = _docker_run_ok() - sandbox = DockerSandbox(workspace_dir=tmp_path) + sandbox = DockerSandbox(shared_dir=tmp_path) sandbox.close() result = sandbox.execute("echo hello") assert result.exit_code == 1 @@ -322,9 +279,7 @@ def test_execute_after_close_returns_error( @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_execute_timeout_with_custom_message( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, @@ -333,7 +288,7 @@ def test_execute_timeout_with_custom_message( error=DockerError("docker command timed out after 1 seconds"), ) - sandbox = DockerSandbox(workspace_dir=tmp_path, timeout=1) + sandbox = DockerSandbox(shared_dir=tmp_path, timeout=1) try: result = sandbox.execute("sleep 10", timeout=1) assert result.exit_code == 124 @@ -344,9 +299,7 @@ def test_execute_timeout_with_custom_message( @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_execute_timeout_with_default_message( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, @@ -355,7 +308,7 @@ def test_execute_timeout_with_default_message( error=DockerError("docker command timed out after 120 seconds"), ) - sandbox = DockerSandbox(workspace_dir=tmp_path) + sandbox = DockerSandbox(shared_dir=tmp_path) try: result = sandbox.execute("sleep 10") assert result.exit_code == 124 @@ -366,9 +319,7 @@ def test_execute_timeout_with_default_message( @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_execute_when_docker_binary_missing( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, @@ -377,7 +328,7 @@ def test_execute_when_docker_binary_missing( error=DockerError("docker executable not found on PATH"), ) - sandbox = DockerSandbox(workspace_dir=tmp_path) + sandbox = DockerSandbox(shared_dir=tmp_path) try: result = sandbox.execute("echo hello") assert result.exit_code == 1 @@ -388,16 +339,14 @@ def test_execute_when_docker_binary_missing( @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_execute_rejects_non_positive_timeout( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, ) -> None: run_docker.return_value = _docker_run_ok() - sandbox = DockerSandbox(workspace_dir=tmp_path) + sandbox = DockerSandbox(shared_dir=tmp_path) try: with pytest.raises(ValueError, match="timeout must be positive"): sandbox.execute("echo hello", timeout=0) @@ -407,16 +356,14 @@ def test_execute_rejects_non_positive_timeout( @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_write_and_read_via_virtual_paths( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, ) -> None: run_docker.return_value = _docker_run_ok() - sandbox = DockerSandbox(workspace_dir=tmp_path, image="test-image:local") + sandbox = DockerSandbox(shared_dir=tmp_path, image="test-image:local") try: write_result = sandbox.write("/notes.txt", "alpha\n") assert write_result.error is None @@ -432,16 +379,14 @@ def test_write_and_read_via_virtual_paths( @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_close_stops_and_removes_container( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, ) -> None: run_docker.return_value = _docker_run_ok() - sandbox = DockerSandbox(workspace_dir=tmp_path) + sandbox = DockerSandbox(shared_dir=tmp_path) container_name = sandbox._container_name sandbox.close() @@ -453,16 +398,14 @@ def test_close_stops_and_removes_container( @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_close_is_idempotent( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, ) -> None: run_docker.return_value = _docker_run_ok() - sandbox = DockerSandbox(workspace_dir=tmp_path) + sandbox = DockerSandbox(shared_dir=tmp_path) sandbox.close() sandbox.close() @@ -471,16 +414,14 @@ def test_close_is_idempotent( @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_close_skips_remove_when_auto_remove_disabled( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, ) -> None: run_docker.return_value = _docker_run_ok() - sandbox = DockerSandbox(workspace_dir=tmp_path, auto_remove=False) + sandbox = DockerSandbox(shared_dir=tmp_path, auto_remove=False) sandbox.close() assert len(run_docker.call_args_list) == 2 @@ -490,37 +431,33 @@ def test_close_skips_remove_when_auto_remove_disabled( @patch("deepagents_docker.backend.tempfile.mkdtemp") @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") -def test_close_removes_owned_workspace( - _inspect: MagicMock, +def test_close_removes_owned_shared_dir( _available: MagicMock, run_docker: MagicMock, mkdtemp: MagicMock, tmp_path: Path, ) -> None: - workspace = tmp_path / "owned-workspace" - workspace.mkdir() - mkdtemp.return_value = str(workspace) + shared = tmp_path / "owned-shared" + shared.mkdir() + mkdtemp.return_value = str(shared) run_docker.return_value = _docker_run_ok() sandbox = DockerSandbox() sandbox.close() - assert not workspace.exists() + assert not shared.exists() @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") -def test_close_preserves_user_workspace( - _inspect: MagicMock, +def test_close_preserves_user_shared_dir( _available: MagicMock, run_docker: MagicMock, tmp_path: Path, ) -> None: run_docker.return_value = _docker_run_ok() - sandbox = DockerSandbox(workspace_dir=tmp_path) + sandbox = DockerSandbox(shared_dir=tmp_path) sandbox.close() assert tmp_path.exists() @@ -528,16 +465,14 @@ def test_close_preserves_user_workspace( @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) -@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") def test_context_manager_closes_sandbox( - _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, tmp_path: Path, ) -> None: run_docker.return_value = _docker_run_ok() - with DockerSandbox(workspace_dir=tmp_path) as sandbox: - assert sandbox.id == "cid" + with DockerSandbox(shared_dir=tmp_path) as sandbox: + assert len(sandbox.id) == 12 assert len(run_docker.call_args_list) == 3