From 86e1839dd7182207f63e028c2c1e8e3184095bb7 Mon Sep 17 00:00:00 2001 From: Hemil Desai Date: Wed, 24 Jun 2026 13:33:20 -0700 Subject: [PATCH] docs: add sandbox API guide Signed-off-by: Hemil Desai --- .../latest/pages/api-reference/index.mdx | 1 + .../engineering-notes/index.mdx | 2 +- .../latest/pages/infrastructure/index.mdx | 6 + .../sandbox/adding-a-provider.mdx | 114 ++++++++ .../pages/infrastructure/sandbox/index.mdx | 253 ++++++++++++++++++ 5 files changed, 375 insertions(+), 1 deletion(-) create mode 100644 fern/versions/latest/pages/infrastructure/sandbox/adding-a-provider.mdx create mode 100644 fern/versions/latest/pages/infrastructure/sandbox/index.mdx diff --git a/fern/versions/latest/pages/api-reference/index.mdx b/fern/versions/latest/pages/api-reference/index.mdx index e8b309d1bc..9692a94c36 100644 --- a/fern/versions/latest/pages/api-reference/index.mdx +++ b/fern/versions/latest/pages/api-reference/index.mdx @@ -17,3 +17,4 @@ This reference is built from docstrings in the [source code](https://github.com/ | `nemo_gym.config_types` | Pydantic configuration models for servers, datasets, and CLI | | `nemo_gym.server_utils` | Server utilities, HTTP client, and middleware | | `nemo_gym.openai_utils` | OpenAI API client wrapper | +| `nemo_gym.sandbox` | Provider-neutral sandbox API for isolated command execution and file transfer | diff --git a/fern/versions/latest/pages/infrastructure/engineering-notes/index.mdx b/fern/versions/latest/pages/infrastructure/engineering-notes/index.mdx index ce48f79b15..d86cc4efb0 100644 --- a/fern/versions/latest/pages/infrastructure/engineering-notes/index.mdx +++ b/fern/versions/latest/pages/infrastructure/engineering-notes/index.mdx @@ -1,7 +1,7 @@ --- title: "Engineering Notes" description: "" -position: 3 +position: 4 --- Technical notes that document infrastructure decisions, performance investigations, and design rationale behind NeMo Gym. diff --git a/fern/versions/latest/pages/infrastructure/index.mdx b/fern/versions/latest/pages/infrastructure/index.mdx index 9509ca0aa2..36ad3d7f81 100644 --- a/fern/versions/latest/pages/infrastructure/index.mdx +++ b/fern/versions/latest/pages/infrastructure/index.mdx @@ -13,6 +13,12 @@ Server deployment patterns and training framework integration. deployment topology training-integration + +Provider-neutral isolated execution for agents, resources servers, and benchmark harnesses. + +execution sandbox providers + + Technical notes on infrastructure decisions and design rationale. diff --git a/fern/versions/latest/pages/infrastructure/sandbox/adding-a-provider.mdx b/fern/versions/latest/pages/infrastructure/sandbox/adding-a-provider.mdx new file mode 100644 index 0000000000..bbdfae93ad --- /dev/null +++ b/fern/versions/latest/pages/infrastructure/sandbox/adding-a-provider.mdx @@ -0,0 +1,114 @@ +--- +title: "Adding a Sandbox Provider" +description: "Implement and register a sandbox runtime backend for NeMo Gym." +position: 2 +--- + +Add a provider when NeMo Gym needs to create sandboxes through a new runtime backend, such as a container service, HPC isolation layer, or in-house execution platform. The public `AsyncSandbox` and `Sandbox` facades stay the same; the provider owns runtime-specific create, command, file transfer, status, and cleanup behavior. + +## Provider Contract + +Providers implement the `SandboxProvider` protocol from `nemo_gym.sandbox.providers.base`. Keep common caller fields on `SandboxSpec`; put backend-specific options in `SandboxSpec.provider_options`. + +```python +from pathlib import Path + +from nemo_gym.sandbox.providers.base import ( + SandboxExecResult, + SandboxHandle, + SandboxSpec, + SandboxStatus, +) + + +class MySandboxProvider: + name = "my_provider" + + async def create(self, spec: SandboxSpec) -> SandboxHandle: + raw = await my_runtime_create(spec) + return SandboxHandle(sandbox_id=raw.id, provider_name=self.name, raw=raw) + + async def exec( + self, + handle: SandboxHandle, + command: str, + *, + cwd: str | None = None, + env: dict[str, str] | None = None, + timeout_s: int | float | None = None, + user: str | int | None = None, + ) -> SandboxExecResult: + result = await handle.raw.run(command, cwd=cwd, env=env, timeout_s=timeout_s, user=user) + return SandboxExecResult( + stdout=result.stdout, + stderr=result.stderr, + return_code=result.return_code, + ) + + async def upload_file(self, handle: SandboxHandle, source_path: Path, target_path: str) -> None: + await handle.raw.upload(source_path, target_path) + + async def download_file(self, handle: SandboxHandle, source_path: str, target_path: Path) -> None: + await handle.raw.download(source_path, target_path) + + async def status(self, handle: SandboxHandle) -> SandboxStatus: + return SandboxStatus.RUNNING + + async def close(self, handle: SandboxHandle) -> None: + await handle.raw.stop() + + async def aclose(self) -> None: + return None +``` + +Provider implementations should preserve the same lifecycle contract as the built-in provider: + +- Return a `SandboxHandle` from `create()` only after the sandbox is ready enough to run commands and transfer files. +- Return command status through `SandboxExecResult` for process exits, including nonzero exits. +- Raise `SandboxCreateError` or `SandboxCreateVerificationError` for sandbox allocation and readiness failures. +- Make `close()` safe to call from cleanup paths. +- Use `aclose()` for provider-scoped resources such as SDK clients. + +## Registry + +The registry in `nemo_gym.sandbox.providers.registry` maps provider names from config to provider classes. External packages and tests can register a provider directly: + +```python +from nemo_gym.sandbox.providers.registry import register_provider + + +register_provider("my_provider", MySandboxProvider) +``` + +In-tree built-in providers should use a lazy loader in `registry.py` so importing `nemo_gym.sandbox` does not eagerly import optional provider dependencies. + +```python +def _load_my_provider() -> ProviderClass: + from nemo_gym.sandbox.providers.my_provider import MySandboxProvider + + return MySandboxProvider + + +_BUILTIN_PROVIDER_LOADERS["my_provider"] = _load_my_provider +``` + +After registration, callers select the provider with a single-key provider config: + +```python +provider_config = { + "my_provider": { + "provider_setting": "value", + } +} +``` + +## Provider Pages + +Each provider page should use the same shape so users can compare backends quickly: + +- Setup and optional dependencies +- Provider config fields +- `provider_options` accepted by `SandboxSpec` +- Resource mapping and isolation properties +- Minimal `ng_run` or local first-run example +- Provider-specific troubleshooting diff --git a/fern/versions/latest/pages/infrastructure/sandbox/index.mdx b/fern/versions/latest/pages/infrastructure/sandbox/index.mdx new file mode 100644 index 0000000000..7b74197e03 --- /dev/null +++ b/fern/versions/latest/pages/infrastructure/sandbox/index.mdx @@ -0,0 +1,253 @@ +--- +title: "Sandbox API" +description: "Use the provider-neutral sandbox module for isolated execution in NeMo Gym agents and environments." +position: 3 +--- + +The `nemo_gym.sandbox` module is the provider-neutral interface for creating isolated execution environments, running commands, and moving files in or out of those environments. It gives agents and resources servers one caller-facing API while provider pages document backend-specific setup, configuration, and isolation properties. + +Import from the public package boundary: + +```python +from nemo_gym.sandbox import AsyncSandbox, Sandbox, SandboxResources, SandboxSpec +``` + + +Treat `nemo_gym.sandbox` as the stable caller-facing API. Provider modules under `nemo_gym.sandbox.providers` are implementation details unless you are adding or configuring a provider. + + + + + +Implement the `SandboxProvider` protocol and register a new runtime backend. + +contributors providers + + + + +## Install Provider Dependencies + +The public API is part of `nemo-gym`. Runtime backends can have optional dependencies, so install the extras required by the provider you configure in your agent or resources server. + +## When to Use It + +Use `nemo_gym.sandbox` when a rollout needs a per-task filesystem, a container-backed command runner, or an execution boundary for benchmark harnesses. Common examples include code execution, repository-based software tasks, tool environments that need scratch state, and verifier logic that should run away from the long-lived server process. + +If a task only needs a pure Python verifier with no external process, no mutable filesystem, and no isolation boundary, call that verifier directly from the resources server instead. + +## Core Types + +| API | Purpose | +| --- | --- | +| `AsyncSandbox` | Async facade for FastAPI servers, async agents, and rollout code. Use this inside async code. | +| `Sandbox` | Sync facade for synchronous harnesses. It owns a private event loop and rejects calls from an already-running async loop. | +| `SandboxSpec` | Provider-neutral sandbox creation request. Includes image, TTL, working directory, files, metadata, resources, entrypoint, and provider options. | +| `SandboxResources` | Typed resource request with CPU, memory, disk, and GPU fields. | +| `SandboxExecResult` | Command result with `stdout`, `stderr`, `return_code`, and optional `error_type`. | +| `SandboxStatus` | Provider-neutral lifecycle status: `starting`, `running`, `stopped`, `error`, or `unknown`. | + +## First-Run Example + +Create a small local script after your provider is available. Replace the provider name and settings with the backend configured for your environment. + +```python +import asyncio + +from nemo_gym.sandbox import AsyncSandbox, SandboxResources, SandboxSpec + + +provider_config = { + "opensandbox": {}, +} + +spec = SandboxSpec( + image="python:3.12-slim", + ttl_s=1800, + ready_timeout_s=300, + workdir="/workspace", + files={ + "/workspace/hello.py": "print('hello from sandbox')\n", + }, + resources=SandboxResources(cpu=1, memory_mib=1024, disk_gib=5), + metadata={"example": "first-run"}, +) + + +async def main() -> None: + async with AsyncSandbox(provider_config, spec) as sandbox: + await sandbox.start() + result = await sandbox.exec("python /workspace/hello.py", timeout_s=60) + print(result.stdout or result.stderr) + raise SystemExit(result.return_code) + + +asyncio.run(main()) +``` + +Run it from your local checkout or application environment: + +```bash +python first_sandbox.py +``` + +`exec()` returns a `SandboxExecResult`. Nonzero process exits are reported in `return_code`; providers should reserve exceptions for sandbox runtime failures such as allocation, transport, or lifecycle errors. + +## Lifecycle + +`AsyncSandbox` and `Sandbox` are lifecycle objects. Construct one with a provider config and optional `SandboxSpec`, call `start()`, run commands or transfer files, then call `stop()`. Context managers close the provider on exit, but they do not start the sandbox automatically. + +```python +from nemo_gym.sandbox import AsyncSandbox, SandboxResources, SandboxSpec + + +spec = SandboxSpec( + image="ghcr.io/example/eval-image:py312", + ttl_s=18000, + ready_timeout_s=1200, + workdir="/workspace", + resources=SandboxResources(cpu=2, memory_mib=8192, disk_gib=20), + metadata={"benchmark": "my-benchmark", "task_id": "task-001"}, +) + +async with AsyncSandbox(provider_config, spec) as sandbox: + await sandbox.start() + result = await sandbox.exec( + "python -m pytest -q", + timeout_s=600, + user="root", + ) + passed = result.return_code == 0 +``` + +## Sync vs. Async + +Use `AsyncSandbox` inside FastAPI handlers, async resources servers, async agents, and rollout collection code. + +Use `Sandbox` only in synchronous code, such as a third-party harness adapter that does not expose async hooks. + +```python +from nemo_gym.sandbox import Sandbox, SandboxSpec + + +with Sandbox(provider_config, SandboxSpec(image="ghcr.io/example/eval-image:py312")) as sandbox: + sandbox.start() + result = sandbox.exec("python --version", timeout_s=30) + output = "\n".join(part for part in (result.stdout, result.stderr) if part) +``` + + +Do not call `Sandbox` from FastAPI handlers, async resources servers, or async agents. It blocks the caller by design. Use `AsyncSandbox` in async code. + + +## SandboxSpec Fields + +`SandboxSpec` is intentionally provider-neutral. Providers map these fields onto their own runtime primitives. + +| Field | Description | +| --- | --- | +| `image` | Container image to create. | +| `ttl_s` | Sandbox lifetime in seconds, when supported by the provider. | +| `ready_timeout_s` | Time to wait for sandbox readiness. | +| `workdir` | Default working directory for `exec()` calls. | +| `env` | Environment variables injected into the sandbox. Forward only values required by the task. | +| `files` | Text files to upload at startup, keyed by remote target path. | +| `metadata` | String metadata for tracing, debugging, and backend labels. Providers may normalize values for their runtime. | +| `resources` | `SandboxResources` or a mapping with `cpu`, `memory_mib`, `disk_gib`, `gpu`, and `gpu_type`. | +| `entrypoint` | Optional container entrypoint override. | +| `provider_options` | Provider-specific options that do not fit the common schema. | + +You can pass resources as either a `SandboxResources` instance or a mapping: + +```python +spec = SandboxSpec( + image="ghcr.io/example/eval-image:py312", + resources={ + "cpu": 2, + "memory_mib": 8192, + "disk_gib": 20, + }, +) +``` + +Unknown resource keys raise a `ValueError`, which catches config drift early. + +## Startup Files and File Transfer + +Use `files` for small text files that should exist before the first command runs: + +```python +spec = SandboxSpec( + image="ghcr.io/example/eval-image:py312", + workdir="/workspace", + files={ + "/workspace/input.txt": "hello\n", + }, +) +``` + +Use `upload()` and `download()` for local files: + +```python +await sandbox.upload(local_path, "/workspace/archive.tar.gz") +await sandbox.download("/workspace/log.txt", output_path) +``` + +`upload()` and `download()` operate on files. If you need structured values, serialize them locally before uploading and parse the downloaded file locally after the sandbox command completes. + +## Status and Cleanup + +Call `status()` when a runner needs to distinguish a stopped sandbox from a provider error: + +```python +status = await sandbox.status() +if status.value == "error": + ... +``` + +Always stop sandboxes in cleanup paths. `stop()` is idempotent on the public facade and closes provider-scoped resources after ending the sandbox lifecycle. + +```python +sandbox = AsyncSandbox(provider_config, spec) +try: + await sandbox.start() + result = await sandbox.exec("pytest -q", timeout_s=600) +finally: + await sandbox.stop() +``` + +## Image Rewrites + +Use `rewrite_image()` when a benchmark's upstream image needs to run through an internal registry mirror. + +```python +from nemo_gym.sandbox import rewrite_image + + +image = rewrite_image( + "docker.io/library/python:3.12-slim", + [{"from": "docker.io/", "to": "mirror.example.com/dockerhub/"}], +) +``` + +Rewrites are ordered. The first matching `from` prefix wins. + +## Error Handling + +Sandbox create failures use provider-neutral exception classes: + +- `SandboxCreateError` for sandbox allocation or readiness failures. +- `SandboxCreateVerificationError` when a created sandbox fails Gym readiness verification. + +In resources servers and agents, catch these errors close to the sandbox operation and return a meaningful verifier or rollout error. Do not let one bad sandbox allocation crash a long-running server. + +```python +from nemo_gym.sandbox import AsyncSandbox, SandboxCreateError + + +sandbox = AsyncSandbox(provider_config, spec) +try: + await sandbox.start() +except SandboxCreateError as error: + return {"reward": 0.0, "error": f"sandbox_create_failed: {error}"} +```