Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: CI

on:
pull_request:
push:
branches: [master, main]

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: astral-sh/setup-uv@v5

- name: Install dependencies
run: uv sync

- name: Ruff check
run: uv run ruff check .

- name: Ruff format
run: uv run ruff format --check .

test:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.12", "3.13"]
steps:
- uses: actions/checkout@v4

- uses: astral-sh/setup-uv@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: uv sync

- name: Run tests
run: uv run pytest -v
37 changes: 37 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,43 @@ with DockerSandbox() as backend:
print("Done!")
```

## Example

The [pizza agent](examples/pizza_agent.py) searches the web for a Neapolitan pizza recipe and writes it to a file in the workspace:

```python
from deepagents import create_deep_agent
from deepagents_docker import DockerSandbox

backend = DockerSandbox(
workspace_dir="examples/data",
allow_outbound_traffic=True,
)

agent = create_deep_agent(
model="openai:gpt-5.5",
backend=backend,
system_prompt="You are a pizza chef.",
)

for step in agent.stream(
{"messages": "Find the best neapolitan pizza recipe and write it to the recipe.md file."},
stream_mode="updates",
):
for update in step.values():
if update and (messages := update.get("messages")):
for message in messages:
message.pretty_print()
```

From a clone of this repo (requires an OpenAI API key):

```bash
uv run python examples/pizza_agent.py
```

The agent writes `recipe.md` under `examples/data/`.

## Development

```bash
Expand Down
24 changes: 24 additions & 0 deletions examples/pizza_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from deepagents import create_deep_agent

from deepagents_docker import DockerSandbox

backend = DockerSandbox(
workspace_dir="examples/data",
allow_outbound_traffic=True,
)

agent = create_deep_agent(
model="openai:gpt-5.5",
backend=backend,
system_prompt="You are a pizza chef.",
)

if __name__ == "__main__":
for step in agent.stream(
{"messages": "Find the best neapolitan pizza recipe and write it to the recipe.md file."},
stream_mode="updates",
):
for update in step.values():
if update and (messages := update.get("messages")):
for message in messages:
message.pretty_print()
16 changes: 16 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ classifiers = [
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dependencies = [
"deepagents>=0.6.7",
Expand All @@ -28,6 +29,7 @@ dependencies = [
dev = [
"build>=1.2.0",
"pytest>=9.0.0",
"ruff>=0.9.0",
"twine>=6.0.0",
]

Expand All @@ -41,4 +43,18 @@ packages = ["src/deepagents_docker"]
[tool.pytest.ini_options]
testpaths = ["tests"]

[tool.ruff]
target-version = "py312"
line-length = 100

[tool.ruff.lint]
select = [
# isort
"I",

# Pyflakes
"F",

# Pyupgrade
"UP",
]
7 changes: 3 additions & 4 deletions src/deepagents_docker/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Docker-backed sandbox backend for DeepAgents."""

from deepagents_docker.backend import (
DockerSandbox,
)
from .backend import DockerSandbox
from .errors import DockerError

__all__ = ["DockerSandbox"]
__all__ = ["DockerError", "DockerSandbox"]
4 changes: 1 addition & 3 deletions src/deepagents_docker/_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
from collections.abc import Sequence
from dataclasses import dataclass


class DockerError(RuntimeError):
"""Raised when a Docker CLI invocation fails."""
from .errors import DockerError


@dataclass(frozen=True)
Expand Down
82 changes: 35 additions & 47 deletions src/deepagents_docker/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,28 @@

import atexit
import shlex
import subprocess
import tempfile
import uuid
from pathlib import Path

from deepagents.backends.filesystem import FilesystemBackend
from deepagents.backends.protocol import ExecuteResponse, SandboxBackendProtocol

from deepagents_docker._docker import (
DockerError,
from ._docker import (
docker_available,
format_docker_error,
inspect_container_id,
run_docker,
)
from .errors import DockerError

DEFAULT_EXECUTE_TIMEOUT = 120
DEFAULT_IMAGE = "python:3.12-bookworm"
CONTAINER_WORKDIR = "/workspace"


class DockerSandbox(FilesystemBackend, SandboxBackendProtocol):
"""Filesystem backend with shell commands executed inside a Docker container.

File operations (`ls`, `read`, `write`, `edit`, `grep`, `glob`) run against a
dedicated workspace directory on the host via `FilesystemBackend` with
`virtual_mode=True`. The same directory is bind-mounted into the container at
`/workspace`, and the `execute` tool runs commands there with Docker resource
and security limits.

This is defense in depth, not a perfect isolation boundary. Do not mount
secrets into the workspace, keep Docker patched, and prefer microVMs for
hostile multi-tenant workloads.
"""
"""Docker-backed sandbox backend for DeepAgents."""

def __init__(
self,
Expand All @@ -47,8 +35,8 @@ def __init__(
workspace_dir: str | Path | None = None,
timeout: int = DEFAULT_EXECUTE_TIMEOUT,
max_output_bytes: int = 100_000,
memory: str = "512m",
cpus: float = 1.0,
memory: str = "256m",
cpus: float = 0.5,
pids_limit: int = 128,
auto_remove: bool = True,
extra_run_args: list[str] | None = None,
Expand All @@ -57,14 +45,14 @@ def __init__(

Args:
image: Docker image for command execution (default: official ``python:3.12-bookworm``).
allow_outbound_traffic: Allow/deny outbound network traffic (default: allow).
workspace_dir: Host directory for agent files. A temporary directory is
created when omitted.
timeout: Default command timeout in seconds.
max_output_bytes: Maximum combined stdout/stderr captured per command.
memory: Docker memory limit (for example ``"512m"``).
memory: Docker memory limit (for example ``"256m"``).
cpus: Docker CPU limit.
pids_limit: Maximum number of PIDs inside the container.
outbound_traffic: Allow/deny outbound network traffic (default: allow).
auto_remove: Remove the container on ``close()``.
extra_run_args: Additional ``docker run`` flags appended before the image.
"""
Expand Down Expand Up @@ -146,9 +134,9 @@ def _start_container(self) -> None:
"ALL",
"--read-only",
"--tmpfs",
"/tmp:rw,noexec,nosuid,size=64m",
"/tmp:rw,noexec,nosuid,size=512m",
"--tmpfs",
"/var/tmp:rw,noexec,nosuid,size=64m",
"/var/tmp:rw,noexec,nosuid,size=512m",
"-v",
f"{self._workspace}:{CONTAINER_WORKDIR}:rw",
"-w",
Expand Down Expand Up @@ -198,41 +186,41 @@ def execute(
wrapped = self._wrap_command(command)
docker_args = [
"exec",
"-w",
CONTAINER_WORKDIR,
self._container_name,
"sh",
"-c",
wrapped,
]

try:
completed = subprocess.run( # noqa: S602
["docker", *docker_args],
check=False,
capture_output=True,
text=True,
timeout=effective_timeout,
)
except subprocess.TimeoutExpired:
if timeout is not None:
msg = (
f"Error: Command timed out after {effective_timeout} seconds "
"(custom timeout). The command may be stuck or require more time."
completed = run_docker(docker_args, timeout=effective_timeout)
except DockerError as exc:
detail = str(exc)
if "timed out" in detail:
if timeout is not None:
msg = (
f"Error: Command timed out after {effective_timeout} seconds "
"(custom timeout). The command may be stuck or require more time."
)
else:
msg = (
f"Error: Command timed out after {effective_timeout} seconds. "
"For long-running commands, re-run using the timeout parameter."
)
return ExecuteResponse(output=msg, exit_code=124, truncated=False)
if "not found on PATH" in detail:
return ExecuteResponse(
output=(
"Error executing command (FileNotFoundError): "
"docker executable not found on PATH"
),
exit_code=1,
truncated=False,
)
else:
msg = (
f"Error: Command timed out after {effective_timeout} seconds. "
"For long-running commands, re-run using the timeout parameter."
)
return ExecuteResponse(output=msg, exit_code=124, truncated=False)
except FileNotFoundError:
return ExecuteResponse(
output="Error executing command (FileNotFoundError): docker executable not found on PATH",
exit_code=1,
truncated=False,
)
except Exception as exc: # noqa: BLE001
return ExecuteResponse(
output=f"Error executing command ({type(exc).__name__}): {exc}",
output=f"Error executing command (DockerError): {exc}",
exit_code=1,
truncated=False,
)
Expand Down
2 changes: 2 additions & 0 deletions src/deepagents_docker/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class DockerError(RuntimeError):
"""Raised when a Docker CLI invocation fails."""
Loading
Loading