From e09b4cd57bfa04a6405dad04e8d60c6cb719c3ab Mon Sep 17 00:00:00 2001 From: Andrea Bruno Date: Wed, 3 Jun 2026 16:34:39 +0200 Subject: [PATCH 1/2] Fix: refactor + tests + ci --- .github/workflows/ci.yml | 42 +++ pyproject.toml | 16 ++ src/deepagents_docker/__init__.py | 7 +- src/deepagents_docker/_docker.py | 4 +- src/deepagents_docker/backend.py | 57 ++-- src/deepagents_docker/errors.py | 2 + tests/test_docker_helpers.py | 104 +++++++ tests/test_docker_sandbox.py | 460 +++++++++++++++++++++++++++--- uv.lock | 29 +- 9 files changed, 644 insertions(+), 77 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 src/deepagents_docker/errors.py create mode 100644 tests/test_docker_helpers.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..0061e7c --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,42 @@ +name: CI + +on: + pull_request: + push: + branches: [master, main] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: astral-sh/setup-uv@v5 + + - name: Install dependencies + run: uv sync + + - name: Ruff check + run: uv run ruff check . + + - name: Ruff format + run: uv run ruff format --check . + + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.12", "3.13"] + steps: + - uses: actions/checkout@v4 + + - uses: astral-sh/setup-uv@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: uv sync + + - name: Run tests + run: uv run pytest -v diff --git a/pyproject.toml b/pyproject.toml index 6660432..98e9784 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ classifiers = [ "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Scientific/Engineering :: Artificial Intelligence", ] dependencies = [ "deepagents>=0.6.7", @@ -28,6 +29,7 @@ dependencies = [ dev = [ "build>=1.2.0", "pytest>=9.0.0", + "ruff>=0.9.0", "twine>=6.0.0", ] @@ -41,4 +43,18 @@ packages = ["src/deepagents_docker"] [tool.pytest.ini_options] testpaths = ["tests"] +[tool.ruff] +target-version = "py312" +line-length = 100 +[tool.ruff.lint] +select = [ + # isort + "I", + + # Pyflakes + "F", + + # Pyupgrade + "UP", +] diff --git a/src/deepagents_docker/__init__.py b/src/deepagents_docker/__init__.py index 5bc646a..fac3d8e 100644 --- a/src/deepagents_docker/__init__.py +++ b/src/deepagents_docker/__init__.py @@ -1,7 +1,6 @@ """Docker-backed sandbox backend for DeepAgents.""" -from deepagents_docker.backend import ( - DockerSandbox, -) +from .backend import DockerSandbox +from .errors import DockerError -__all__ = ["DockerSandbox"] +__all__ = ["DockerError", "DockerSandbox"] diff --git a/src/deepagents_docker/_docker.py b/src/deepagents_docker/_docker.py index e76ccd5..a236443 100644 --- a/src/deepagents_docker/_docker.py +++ b/src/deepagents_docker/_docker.py @@ -7,9 +7,7 @@ from collections.abc import Sequence from dataclasses import dataclass - -class DockerError(RuntimeError): - """Raised when a Docker CLI invocation fails.""" +from .errors import DockerError @dataclass(frozen=True) diff --git a/src/deepagents_docker/backend.py b/src/deepagents_docker/backend.py index 2d95162..72a83ab 100644 --- a/src/deepagents_docker/backend.py +++ b/src/deepagents_docker/backend.py @@ -4,7 +4,6 @@ import atexit import shlex -import subprocess import tempfile import uuid from pathlib import Path @@ -12,13 +11,13 @@ from deepagents.backends.filesystem import FilesystemBackend from deepagents.backends.protocol import ExecuteResponse, SandboxBackendProtocol -from deepagents_docker._docker import ( - DockerError, +from ._docker import ( docker_available, format_docker_error, inspect_container_id, run_docker, ) +from .errors import DockerError DEFAULT_EXECUTE_TIMEOUT = 120 DEFAULT_IMAGE = "python:3.12-bookworm" @@ -57,6 +56,7 @@ def __init__( Args: image: Docker image for command execution (default: official ``python:3.12-bookworm``). + allow_outbound_traffic: Allow/deny outbound network traffic (default: allow). workspace_dir: Host directory for agent files. A temporary directory is created when omitted. timeout: Default command timeout in seconds. @@ -64,7 +64,6 @@ def __init__( memory: Docker memory limit (for example ``"512m"``). cpus: Docker CPU limit. pids_limit: Maximum number of PIDs inside the container. - outbound_traffic: Allow/deny outbound network traffic (default: allow). auto_remove: Remove the container on ``close()``. extra_run_args: Additional ``docker run`` flags appended before the image. """ @@ -205,34 +204,32 @@ def execute( ] try: - completed = subprocess.run( # noqa: S602 - ["docker", *docker_args], - check=False, - capture_output=True, - text=True, - timeout=effective_timeout, - ) - except subprocess.TimeoutExpired: - if timeout is not None: - msg = ( - f"Error: Command timed out after {effective_timeout} seconds " - "(custom timeout). The command may be stuck or require more time." - ) - else: - msg = ( - f"Error: Command timed out after {effective_timeout} seconds. " - "For long-running commands, re-run using the timeout parameter." + completed = run_docker(docker_args, timeout=effective_timeout) + except DockerError as exc: + detail = str(exc) + if "timed out" in detail: + if timeout is not None: + msg = ( + f"Error: Command timed out after {effective_timeout} seconds " + "(custom timeout). The command may be stuck or require more time." + ) + else: + msg = ( + f"Error: Command timed out after {effective_timeout} seconds. " + "For long-running commands, re-run using the timeout parameter." + ) + return ExecuteResponse(output=msg, exit_code=124, truncated=False) + if "not found on PATH" in detail: + return ExecuteResponse( + output=( + "Error executing command (FileNotFoundError): " + "docker executable not found on PATH" + ), + exit_code=1, + truncated=False, ) - return ExecuteResponse(output=msg, exit_code=124, truncated=False) - except FileNotFoundError: - return ExecuteResponse( - output="Error executing command (FileNotFoundError): docker executable not found on PATH", - exit_code=1, - truncated=False, - ) - except Exception as exc: # noqa: BLE001 return ExecuteResponse( - output=f"Error executing command ({type(exc).__name__}): {exc}", + output=f"Error executing command (DockerError): {exc}", exit_code=1, truncated=False, ) diff --git a/src/deepagents_docker/errors.py b/src/deepagents_docker/errors.py new file mode 100644 index 0000000..6a3fa77 --- /dev/null +++ b/src/deepagents_docker/errors.py @@ -0,0 +1,2 @@ +class DockerError(RuntimeError): + """Raised when a Docker CLI invocation fails.""" diff --git a/tests/test_docker_helpers.py b/tests/test_docker_helpers.py new file mode 100644 index 0000000..dd9b8b8 --- /dev/null +++ b/tests/test_docker_helpers.py @@ -0,0 +1,104 @@ +"""Unit tests for low-level Docker CLI helpers.""" + +from __future__ import annotations + +import subprocess +from unittest.mock import MagicMock, patch + +import pytest + +from deepagents_docker._docker import ( + DockerRunResult, + docker_available, + format_docker_error, + inspect_container_id, + run_docker, +) +from deepagents_docker.errors import DockerError + + +def test_format_docker_error_returns_plain_stderr() -> None: + result = DockerRunResult(returncode=1, stdout="", stderr="image not found") + assert format_docker_error(result) == "image not found" + + +def test_format_docker_error_extracts_json_message() -> None: + result = DockerRunResult( + returncode=1, + stdout="", + stderr='{"message": "Conflict. The container name is already in use."}', + ) + assert format_docker_error(result) == "Conflict. The container name is already in use." + + +def test_format_docker_error_returns_json_without_message_field() -> None: + payload = '{"errorDetail":{"code":404}}' + result = DockerRunResult(returncode=1, stdout="", stderr=payload) + assert format_docker_error(result) == payload + + +def test_format_docker_error_falls_back_to_exit_code() -> None: + result = DockerRunResult(returncode=7, stdout="", stderr="") + assert format_docker_error(result) == "exit code 7" + + +@patch("deepagents_docker._docker.run_docker") +def test_docker_available_true_when_info_succeeds(run_docker: MagicMock) -> None: + run_docker.return_value = DockerRunResult(returncode=0, stdout="25.0.0\n", stderr="") + assert docker_available() is True + + +@patch("deepagents_docker._docker.run_docker") +def test_docker_available_false_when_info_fails(run_docker: MagicMock) -> None: + run_docker.return_value = DockerRunResult(returncode=1, stdout="", stderr="daemon down") + assert docker_available() is False + + +@patch("deepagents_docker._docker.run_docker") +def test_inspect_container_id_returns_stdout(run_docker: MagicMock) -> None: + run_docker.return_value = DockerRunResult(returncode=0, stdout="abc123\n", stderr="") + assert inspect_container_id("my-container") == "abc123" + + +@patch("deepagents_docker._docker.run_docker") +def test_inspect_container_id_raises_on_failure(run_docker: MagicMock) -> None: + run_docker.return_value = DockerRunResult(returncode=1, stdout="", stderr="no such object") + with pytest.raises(DockerError, match="no such object"): + inspect_container_id("missing") + + +@patch("deepagents_docker._docker.run_docker") +def test_inspect_container_id_fallback_message(run_docker: MagicMock) -> None: + run_docker.return_value = DockerRunResult(returncode=1, stdout="", stderr="") + with pytest.raises(DockerError, match="failed to inspect container 'missing'"): + inspect_container_id("missing") + + +@patch("deepagents_docker._docker.subprocess.run") +def test_run_docker_returns_captured_output(subprocess_run: MagicMock) -> None: + completed = MagicMock() + completed.returncode = 0 + completed.stdout = "ok\n" + completed.stderr = "" + subprocess_run.return_value = completed + + result = run_docker(["info"]) + + assert result == DockerRunResult(returncode=0, stdout="ok\n", stderr="") + subprocess_run.assert_called_once() + + +@patch("deepagents_docker._docker.subprocess.run") +def test_run_docker_raises_on_timeout(subprocess_run: MagicMock) -> None: + subprocess_run.side_effect = subprocess.TimeoutExpired(cmd="docker", timeout=5) + + with pytest.raises(DockerError, match="timed out after 5 seconds"): + run_docker(["exec", "cid", "true"], timeout=5) + + +@patch("deepagents_docker._docker.subprocess.run") +def test_run_docker_raises_when_docker_missing(subprocess_run: MagicMock) -> None: + subprocess_run.side_effect = FileNotFoundError + + with pytest.raises(DockerError, match="not found on PATH"): + run_docker(["info"]) diff --git a/tests/test_docker_sandbox.py b/tests/test_docker_sandbox.py index 67d9977..45fccaa 100644 --- a/tests/test_docker_sandbox.py +++ b/tests/test_docker_sandbox.py @@ -2,27 +2,34 @@ from __future__ import annotations -import subprocess from pathlib import Path from unittest.mock import MagicMock, patch import pytest -from deepagents_docker import DockerSandbox -from deepagents_docker._docker import DockerError, DockerRunResult +from deepagents_docker import DockerError, DockerSandbox +from deepagents_docker._docker import DockerRunResult from deepagents_docker.backend import DEFAULT_IMAGE -def _docker_info_ok() -> DockerRunResult: - return DockerRunResult(returncode=0, stdout="25.0.0\n", stderr="") - - def _docker_run_ok() -> DockerRunResult: return DockerRunResult(returncode=0, stdout="", stderr="") -def _docker_inspect_ok() -> DockerRunResult: - return DockerRunResult(returncode=0, stdout="abc123container\n", stderr="") +def _make_run_docker_side_effect(**exec_config: object): + """Build a side_effect for mocked run_docker (handles run/exec/stop/rm).""" + + def _run_docker(args: list[str], **kwargs: object) -> DockerRunResult: + if args[0] == "exec": + if "error" in exec_config: + raise exec_config["error"] + return exec_config.get( + "exec", + DockerRunResult(returncode=0, stdout="", stderr=""), + ) + return _docker_run_ok() + + return _run_docker @patch("deepagents_docker.backend.run_docker") @@ -40,9 +47,7 @@ def test_default_image_is_python_bookworm( try: run_args = run_docker.call_args_list[0][0][0] assert DEFAULT_IMAGE in run_args - assert run_args[ - run_args.index(DEFAULT_IMAGE) + 1 : run_args.index(DEFAULT_IMAGE) + 3 - ] == [ + assert run_args[run_args.index(DEFAULT_IMAGE) + 1 : run_args.index(DEFAULT_IMAGE) + 3] == [ "sleep", "infinity", ] @@ -65,7 +70,7 @@ def test_start_container_applies_security_flags( try: run_args = run_docker.call_args_list[0][0][0] assert "run" in run_args - assert "--network" in run_args and "none" in run_args + assert "--network" in run_args and "bridge" in run_args assert "--cap-drop" in run_args and "ALL" in run_args assert "--read-only" in run_args assert f"{tmp_path.resolve()}:/workspace:rw" in " ".join(run_args) @@ -74,23 +79,126 @@ def test_start_container_applies_security_flags( sandbox.close() -@patch("deepagents_docker.backend.subprocess.run") @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) @patch("deepagents_docker.backend.inspect_container_id", return_value="cid") -def test_execute_wraps_command_and_returns_output( +def test_start_container_disables_outbound_traffic( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.return_value = _docker_run_ok() + + sandbox = DockerSandbox(workspace_dir=tmp_path, allow_outbound_traffic=False) + try: + run_args = run_docker.call_args_list[0][0][0] + network_index = run_args.index("--network") + assert run_args[network_index + 1] == "none" + finally: + sandbox.close() + + +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_start_container_applies_resource_limits_and_extra_args( _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, - subprocess_run: MagicMock, tmp_path: Path, ) -> None: run_docker.return_value = _docker_run_ok() - completed = MagicMock() - completed.stdout = "hello\n" - completed.stderr = "" - completed.returncode = 0 - subprocess_run.return_value = completed + + sandbox = DockerSandbox( + workspace_dir=tmp_path, + memory="1g", + cpus=2.5, + pids_limit=256, + extra_run_args=["--env", "FOO=bar"], + ) + try: + run_args = run_docker.call_args_list[0][0][0] + assert run_args[run_args.index("--memory") + 1] == "1g" + assert run_args[run_args.index("--cpus") + 1] == "2.5" + assert run_args[run_args.index("--pids-limit") + 1] == "256" + assert run_args[run_args.index("--env") + 1] == "FOO=bar" + finally: + sandbox.close() + + +@patch("deepagents_docker.backend.inspect_container_id") +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +def test_raises_when_container_start_fails( + _available: MagicMock, + run_docker: MagicMock, + inspect: MagicMock, + tmp_path: Path, +) -> None: + run_docker.return_value = DockerRunResult(returncode=1, stdout="", stderr="image not found") + + with pytest.raises(DockerError, match="failed to start sandbox container: image not found"): + DockerSandbox(workspace_dir=tmp_path) + + inspect.assert_not_called() + + +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", side_effect=DockerError("inspect failed")) +def test_raises_when_container_inspect_fails( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.return_value = _docker_run_ok() + + with pytest.raises(DockerError, match="inspect failed"): + DockerSandbox(workspace_dir=tmp_path) + + +@pytest.mark.parametrize( + ("kwargs", "match"), + [ + ({"timeout": 0}, "timeout must be positive"), + ({"cpus": 0}, "cpus must be positive"), + ({"pids_limit": -1}, "pids_limit must be positive"), + ], +) +@patch("deepagents_docker.backend.docker_available", return_value=True) +def test_constructor_rejects_invalid_limits( + _available: MagicMock, + kwargs: dict[str, int], + match: str, +) -> None: + with pytest.raises(ValueError, match=match): + DockerSandbox(**kwargs) + + +@patch("deepagents_docker.backend.docker_available", return_value=False) +def test_raises_when_docker_unavailable(_available: MagicMock) -> None: + with pytest.raises(DockerError, match="Docker is not available"): + DockerSandbox(image="missing:latest") + + +def test_docker_error_is_public_runtime_error() -> None: + assert issubclass(DockerError, RuntimeError) + + +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_execute_wraps_command_and_returns_output( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.side_effect = _make_run_docker_side_effect( + exec=DockerRunResult(returncode=0, stdout="hello\n", stderr=""), + ) sandbox = DockerSandbox(workspace_dir=tmp_path, image="test-image:local") try: @@ -98,15 +206,205 @@ def test_execute_wraps_command_and_returns_output( assert result.exit_code == 0 assert "hello" in result.output - docker_cmd = subprocess_run.call_args[0][0] - assert docker_cmd[:2] == ["docker", "exec"] - shell_cmd = docker_cmd[-1] + exec_args = run_docker.call_args_list[1][0][0] + assert exec_args[0] == "exec" + shell_cmd = exec_args[-1] assert shell_cmd.startswith("cd /workspace && ") assert "echo hello" in shell_cmd finally: sandbox.close() +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_execute_formats_stderr_and_nonzero_exit( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.side_effect = _make_run_docker_side_effect( + exec=DockerRunResult(returncode=2, stdout="", stderr="something broke\n"), + ) + + sandbox = DockerSandbox(workspace_dir=tmp_path) + try: + result = sandbox.execute("false") + assert result.exit_code == 2 + assert "[stderr] something broke" in result.output + assert "Exit code: 2" in result.output + finally: + sandbox.close() + + +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_execute_reports_no_output( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.side_effect = _make_run_docker_side_effect( + exec=DockerRunResult(returncode=0, stdout="", stderr=""), + ) + + sandbox = DockerSandbox(workspace_dir=tmp_path) + try: + result = sandbox.execute("true") + assert result.output == "" + finally: + sandbox.close() + + +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_execute_truncates_large_output( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.side_effect = _make_run_docker_side_effect( + exec=DockerRunResult(returncode=0, stdout="x" * 200, stderr=""), + ) + + sandbox = DockerSandbox(workspace_dir=tmp_path, max_output_bytes=50) + try: + result = sandbox.execute("printf x") + assert result.truncated is True + assert len(result.output) <= 50 + len("\n\n... Output truncated at 50 bytes.") + assert "Output truncated at 50 bytes" in result.output + finally: + sandbox.close() + + +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_execute_rejects_empty_command( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.return_value = _docker_run_ok() + + sandbox = DockerSandbox(workspace_dir=tmp_path) + try: + result = sandbox.execute("") + assert result.exit_code == 1 + assert "non-empty string" in result.output + finally: + sandbox.close() + + +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_execute_after_close_returns_error( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.return_value = _docker_run_ok() + + sandbox = DockerSandbox(workspace_dir=tmp_path) + sandbox.close() + result = sandbox.execute("echo hello") + assert result.exit_code == 1 + assert "closed" in result.output.lower() + + +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_execute_timeout_with_custom_message( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.side_effect = _make_run_docker_side_effect( + error=DockerError("docker command timed out after 1 seconds"), + ) + + sandbox = DockerSandbox(workspace_dir=tmp_path, timeout=1) + try: + result = sandbox.execute("sleep 10", timeout=1) + assert result.exit_code == 124 + assert "custom timeout" in result.output.lower() + finally: + sandbox.close() + + +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_execute_timeout_with_default_message( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.side_effect = _make_run_docker_side_effect( + error=DockerError("docker command timed out after 120 seconds"), + ) + + sandbox = DockerSandbox(workspace_dir=tmp_path) + try: + result = sandbox.execute("sleep 10") + assert result.exit_code == 124 + assert "timeout parameter" in result.output.lower() + finally: + sandbox.close() + + +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_execute_when_docker_binary_missing( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.side_effect = _make_run_docker_side_effect( + error=DockerError("docker executable not found on PATH"), + ) + + sandbox = DockerSandbox(workspace_dir=tmp_path) + try: + result = sandbox.execute("echo hello") + assert result.exit_code == 1 + assert "not found on PATH" in result.output + finally: + sandbox.close() + + +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_execute_rejects_non_positive_timeout( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.return_value = _docker_run_ok() + + sandbox = DockerSandbox(workspace_dir=tmp_path) + try: + with pytest.raises(ValueError, match="timeout must be positive"): + sandbox.execute("echo hello", timeout=0) + finally: + sandbox.close() + + @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) @patch("deepagents_docker.backend.inspect_container_id", return_value="cid") @@ -132,30 +430,114 @@ def test_write_and_read_via_virtual_paths( sandbox.close() -@patch("deepagents_docker.backend.docker_available", return_value=False) -def test_raises_when_docker_unavailable(_available: MagicMock) -> None: - with pytest.raises(DockerError, match="Docker is not available"): - DockerSandbox(image="missing:latest") +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_close_stops_and_removes_container( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.return_value = _docker_run_ok() + + sandbox = DockerSandbox(workspace_dir=tmp_path) + container_name = sandbox._container_name + sandbox.close() + + stop_call = run_docker.call_args_list[1] + rm_call = run_docker.call_args_list[2] + assert stop_call[0][0] == ["stop", "-t", "2", container_name] + assert rm_call[0][0] == ["rm", "-f", container_name] -@patch("deepagents_docker.backend.subprocess.run") @patch("deepagents_docker.backend.run_docker") @patch("deepagents_docker.backend.docker_available", return_value=True) @patch("deepagents_docker.backend.inspect_container_id", return_value="cid") -def test_execute_timeout( +def test_close_is_idempotent( _inspect: MagicMock, _available: MagicMock, run_docker: MagicMock, - subprocess_run: MagicMock, tmp_path: Path, ) -> None: run_docker.return_value = _docker_run_ok() - subprocess_run.side_effect = subprocess.TimeoutExpired(cmd="docker", timeout=1) - sandbox = DockerSandbox(workspace_dir=tmp_path, image="test-image:local", timeout=1) - try: - result = sandbox.execute("sleep 10", timeout=1) - assert result.exit_code == 124 - assert "timed out" in result.output.lower() - finally: - sandbox.close() + sandbox = DockerSandbox(workspace_dir=tmp_path) + sandbox.close() + sandbox.close() + + assert len(run_docker.call_args_list) == 3 + + +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_close_skips_remove_when_auto_remove_disabled( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.return_value = _docker_run_ok() + + sandbox = DockerSandbox(workspace_dir=tmp_path, auto_remove=False) + sandbox.close() + + assert len(run_docker.call_args_list) == 2 + assert run_docker.call_args_list[1][0][0][0] == "stop" + + +@patch("deepagents_docker.backend.tempfile.mkdtemp") +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_close_removes_owned_workspace( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + mkdtemp: MagicMock, + tmp_path: Path, +) -> None: + workspace = tmp_path / "owned-workspace" + workspace.mkdir() + mkdtemp.return_value = str(workspace) + run_docker.return_value = _docker_run_ok() + + sandbox = DockerSandbox() + sandbox.close() + + assert not workspace.exists() + + +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_close_preserves_user_workspace( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.return_value = _docker_run_ok() + + sandbox = DockerSandbox(workspace_dir=tmp_path) + sandbox.close() + + assert tmp_path.exists() + + +@patch("deepagents_docker.backend.run_docker") +@patch("deepagents_docker.backend.docker_available", return_value=True) +@patch("deepagents_docker.backend.inspect_container_id", return_value="cid") +def test_context_manager_closes_sandbox( + _inspect: MagicMock, + _available: MagicMock, + run_docker: MagicMock, + tmp_path: Path, +) -> None: + run_docker.return_value = _docker_run_ok() + + with DockerSandbox(workspace_dir=tmp_path) as sandbox: + assert sandbox.id == "cid" + + assert len(run_docker.call_args_list) == 3 diff --git a/uv.lock b/uv.lock index 741ab5d..cd1d8be 100644 --- a/uv.lock +++ b/uv.lock @@ -286,7 +286,7 @@ wheels = [ [[package]] name = "deepagents-docker" -version = "0.0.1" +version = "0.0.2" source = { editable = "." } dependencies = [ { name = "deepagents" }, @@ -296,6 +296,7 @@ dependencies = [ dev = [ { name = "build" }, { name = "pytest" }, + { name = "ruff" }, { name = "twine" }, ] @@ -306,6 +307,7 @@ requires-dist = [{ name = "deepagents", specifier = ">=0.6.7" }] dev = [ { name = "build", specifier = ">=1.2.0" }, { name = "pytest", specifier = ">=9.0.0" }, + { name = "ruff", specifier = ">=0.9.0" }, { name = "twine", specifier = ">=6.0.0" }, ] @@ -1204,6 +1206,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" }, ] +[[package]] +name = "ruff" +version = "0.15.15" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/84/6f/a76f7d96e5c962f5b69cee865e49c15c1116897c01990faa8a57edb62e7f/ruff-0.15.15.tar.gz", hash = "sha256:b8dff018130b46d8e5bf0f926ef6b60cf871d6d5ae45fc9334e09632daa741d6", size = 4706985, upload-time = "2026-05-28T14:16:57.784Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/9d/3a45c05b8ab04b4705989de70a79008e27c8003296a0feaee9edc18dd7e9/ruff-0.15.15-py3-none-linux_armv6l.whl", hash = "sha256:cf93e5388f412e1b108b1f8b34a6e036b70fe8aff89393befad96fe48670311b", size = 10710652, upload-time = "2026-05-28T14:16:06.701Z" }, + { url = "https://files.pythonhosted.org/packages/05/66/da974431624bf3b49f6ee1f9543c02d929ff1cba78b0d5a79c38cf21f744/ruff-0.15.15-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ac5a646d1f6a7dadd5d50842dae2c1f9862ac887ef5d1b1375e02def791fde6e", size = 11096615, upload-time = "2026-05-28T14:16:23.313Z" }, + { url = "https://files.pythonhosted.org/packages/8c/09/7443452e5d290230a712103f2fdceeef7184f3ec99a2bd01c8be78aaceb5/ruff-0.15.15-py3-none-macosx_11_0_arm64.whl", hash = "sha256:77d955a431430c66f72dd94e379ad38a16daea3d25094872ac4edf9e797be530", size = 10436683, upload-time = "2026-05-28T14:16:40.974Z" }, + { url = "https://files.pythonhosted.org/packages/53/01/d330c26a57fa4f3943a14424904027428315b700fe4d14a84bb123a649e5/ruff-0.15.15-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7614ee79c69788cf6cedd568069ade9cecc22a1ad20494efe8d0c9ebb4b622d4", size = 10769064, upload-time = "2026-05-28T14:16:28.905Z" }, + { url = "https://files.pythonhosted.org/packages/1d/85/cc8770f8bdff541b1da8392d1634141fe4a0e3f4ee596605959b7906c27f/ruff-0.15.15-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3cdb1679e06a1f6b47bc384714ae96f6e2fb65ca441eb78c43d2ca554176ce1f", size = 10511987, upload-time = "2026-05-28T14:16:43.732Z" }, + { url = "https://files.pythonhosted.org/packages/7c/29/8c190c1472b63013583ba391f3342036e02010544c1270455ed8e519bdf3/ruff-0.15.15-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2728b93d7b23a603ea2c0ac6eb73d760bd38ec9de35f35fb41e18f7a3fee7622", size = 11275100, upload-time = "2026-05-28T14:16:55.244Z" }, + { url = "https://files.pythonhosted.org/packages/9f/6b/7e145ce2cc8e63d6834eca03d83a0e18d121def5c69f91b4cf4011ed4879/ruff-0.15.15-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be582fcc0db438902c7792b08d6ddf6c9b9e21addaa10092c2c741cfb09e5a45", size = 12176903, upload-time = "2026-05-28T14:16:14.368Z" }, + { url = "https://files.pythonhosted.org/packages/80/a3/d5974637f68e451f7fadf015cf3101d1cd7d8ba5027cffe0b9e3826ebe6b/ruff-0.15.15-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7aa77465b8ecaf1a27bea098d696f7fed5e1eccbd10b321b682d6de586ae5627", size = 11404550, upload-time = "2026-05-28T14:16:20.138Z" }, + { url = "https://files.pythonhosted.org/packages/fe/1c/e6e5e568f22be4fb05d6244234aba384c06b451252453b821e1a529263cf/ruff-0.15.15-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48decfa11d740de4889de623be1463308346312f2409a56e24aa280c86162dc4", size = 11382027, upload-time = "2026-05-28T14:16:46.615Z" }, + { url = "https://files.pythonhosted.org/packages/1d/01/170921b49fcd2e8858825593f91cf7146c3e40a5c3e6df763e4bb0484dde/ruff-0.15.15-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a5015088452ca0081387063649ec67f06d3d1d6b8b936a1f836b5e9657ecd48c", size = 11366041, upload-time = "2026-05-28T14:16:26.247Z" }, + { url = "https://files.pythonhosted.org/packages/87/54/a7bad711d7de93254e15e06a4c375b89a03d18de45d3e5dcc86a4472fb1a/ruff-0.15.15-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:f5294aab6356c81600fcdea3a62bb1b924dfd5e91767c12318d3f68f86af57cd", size = 10741795, upload-time = "2026-05-28T14:16:17.11Z" }, + { url = "https://files.pythonhosted.org/packages/c9/31/38c075963668f8b41c6914ee0f6f318727fbe30ab9145cb29e6df464c5fa/ruff-0.15.15-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:db5bd4d802415cca656dc1616070b725952d6ae95eb5d4831e49fbd94a38f75f", size = 10511117, upload-time = "2026-05-28T14:16:31.767Z" }, + { url = "https://files.pythonhosted.org/packages/9d/96/6ff689e1f7e375d1d97075eca022f74c2bab59554a432fe4d2e6f091986a/ruff-0.15.15-py3-none-musllinux_1_2_i686.whl", hash = "sha256:587a6278ed42059191c1a466e490bd7930fb50bd2e255398bc29616c895a61cb", size = 10994867, upload-time = "2026-05-28T14:16:35.149Z" }, + { url = "https://files.pythonhosted.org/packages/c3/c2/5dce0ab9f92a8d534fa62b9bf9caca3eddb8c1a81b616f5e195ada4f0d6e/ruff-0.15.15-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:df0c1c084f5f4be9812f61518a45c440d3c30d69ce4bf6c5270e66d38338f02a", size = 11482101, upload-time = "2026-05-28T14:16:49.598Z" }, + { url = "https://files.pythonhosted.org/packages/b1/c0/1003b60edd697c649faf61f1a34094b1abb38fb3d1181e3f895781250a08/ruff-0.15.15-py3-none-win32.whl", hash = "sha256:29428ea79694afbe756d45fd59b36f22b6b020dc0443cf7de0173046236964b9", size = 10716774, upload-time = "2026-05-28T14:16:52.337Z" }, + { url = "https://files.pythonhosted.org/packages/02/a8/1269eddd6945a06c23f055ef7848886e37cf9d6a8bebb386a3115f01470c/ruff-0.15.15-py3-none-win_amd64.whl", hash = "sha256:8df0323902e15e24bc4bf246da830573d3cf3352bd0b9a164eab335d111ff4a4", size = 11868463, upload-time = "2026-05-28T14:16:11.333Z" }, + { url = "https://files.pythonhosted.org/packages/4e/b2/920464c907b191e37469d477a1aa8bc048b8f36c4c1610dfa4ab87b39e18/ruff-0.15.15-py3-none-win_arm64.whl", hash = "sha256:3c8ceca6792f38196b8f589bc92eccd03eef286602da92e5dc05cc42ef6441b7", size = 11138498, upload-time = "2026-05-28T14:16:38.425Z" }, +] + [[package]] name = "secretstorage" version = "3.5.0" From af13c510758007b70be509e64b4904b5e2a4e66c Mon Sep 17 00:00:00 2001 From: Andrea Bruno Date: Wed, 3 Jun 2026 18:27:15 +0200 Subject: [PATCH 2/2] Add example --- README.md | 37 ++++++++++++++++++++++++++++++++ examples/pizza_agent.py | 24 +++++++++++++++++++++ src/deepagents_docker/backend.py | 25 +++++++-------------- tests/test_docker_sandbox.py | 2 +- 4 files changed, 70 insertions(+), 18 deletions(-) create mode 100644 examples/pizza_agent.py diff --git a/README.md b/README.md index 45d08cb..4e85561 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,43 @@ with DockerSandbox() as backend: print("Done!") ``` +## Example + +The [pizza agent](examples/pizza_agent.py) searches the web for a Neapolitan pizza recipe and writes it to a file in the workspace: + +```python +from deepagents import create_deep_agent +from deepagents_docker import DockerSandbox + +backend = DockerSandbox( + workspace_dir="examples/data", + allow_outbound_traffic=True, +) + +agent = create_deep_agent( + model="openai:gpt-5.5", + backend=backend, + system_prompt="You are a pizza chef.", +) + +for step in agent.stream( + {"messages": "Find the best neapolitan pizza recipe and write it to the recipe.md file."}, + stream_mode="updates", +): + for update in step.values(): + if update and (messages := update.get("messages")): + for message in messages: + message.pretty_print() +``` + +From a clone of this repo (requires an OpenAI API key): + +```bash +uv run python examples/pizza_agent.py +``` + +The agent writes `recipe.md` under `examples/data/`. + ## Development ```bash diff --git a/examples/pizza_agent.py b/examples/pizza_agent.py new file mode 100644 index 0000000..a77adbc --- /dev/null +++ b/examples/pizza_agent.py @@ -0,0 +1,24 @@ +from deepagents import create_deep_agent + +from deepagents_docker import DockerSandbox + +backend = DockerSandbox( + workspace_dir="examples/data", + allow_outbound_traffic=True, +) + +agent = create_deep_agent( + model="openai:gpt-5.5", + backend=backend, + system_prompt="You are a pizza chef.", +) + +if __name__ == "__main__": + for step in agent.stream( + {"messages": "Find the best neapolitan pizza recipe and write it to the recipe.md file."}, + stream_mode="updates", + ): + for update in step.values(): + if update and (messages := update.get("messages")): + for message in messages: + message.pretty_print() diff --git a/src/deepagents_docker/backend.py b/src/deepagents_docker/backend.py index 72a83ab..15f09b4 100644 --- a/src/deepagents_docker/backend.py +++ b/src/deepagents_docker/backend.py @@ -25,18 +25,7 @@ class DockerSandbox(FilesystemBackend, SandboxBackendProtocol): - """Filesystem backend with shell commands executed inside a Docker container. - - File operations (`ls`, `read`, `write`, `edit`, `grep`, `glob`) run against a - dedicated workspace directory on the host via `FilesystemBackend` with - `virtual_mode=True`. The same directory is bind-mounted into the container at - `/workspace`, and the `execute` tool runs commands there with Docker resource - and security limits. - - This is defense in depth, not a perfect isolation boundary. Do not mount - secrets into the workspace, keep Docker patched, and prefer microVMs for - hostile multi-tenant workloads. - """ + """Docker-backed sandbox backend for DeepAgents.""" def __init__( self, @@ -46,8 +35,8 @@ def __init__( workspace_dir: str | Path | None = None, timeout: int = DEFAULT_EXECUTE_TIMEOUT, max_output_bytes: int = 100_000, - memory: str = "512m", - cpus: float = 1.0, + memory: str = "256m", + cpus: float = 0.5, pids_limit: int = 128, auto_remove: bool = True, extra_run_args: list[str] | None = None, @@ -61,7 +50,7 @@ def __init__( created when omitted. timeout: Default command timeout in seconds. max_output_bytes: Maximum combined stdout/stderr captured per command. - memory: Docker memory limit (for example ``"512m"``). + memory: Docker memory limit (for example ``"256m"``). cpus: Docker CPU limit. pids_limit: Maximum number of PIDs inside the container. auto_remove: Remove the container on ``close()``. @@ -145,9 +134,9 @@ def _start_container(self) -> None: "ALL", "--read-only", "--tmpfs", - "/tmp:rw,noexec,nosuid,size=64m", + "/tmp:rw,noexec,nosuid,size=512m", "--tmpfs", - "/var/tmp:rw,noexec,nosuid,size=64m", + "/var/tmp:rw,noexec,nosuid,size=512m", "-v", f"{self._workspace}:{CONTAINER_WORKDIR}:rw", "-w", @@ -197,6 +186,8 @@ def execute( wrapped = self._wrap_command(command) docker_args = [ "exec", + "-w", + CONTAINER_WORKDIR, self._container_name, "sh", "-c", diff --git a/tests/test_docker_sandbox.py b/tests/test_docker_sandbox.py index 45fccaa..22c4ebe 100644 --- a/tests/test_docker_sandbox.py +++ b/tests/test_docker_sandbox.py @@ -207,7 +207,7 @@ def test_execute_wraps_command_and_returns_output( assert "hello" in result.output exec_args = run_docker.call_args_list[1][0][0] - assert exec_args[0] == "exec" + assert exec_args[:4] == ["exec", "-w", "/workspace", sandbox._container_name] shell_cmd = exec_args[-1] assert shell_cmd.startswith("cd /workspace && ") assert "echo hello" in shell_cmd