From 41167a4b9d7b8921fec2add4b4bf82f14b645c80 Mon Sep 17 00:00:00 2001 From: Ananth Subramaniam Date: Wed, 24 Jun 2026 01:27:46 -0700 Subject: [PATCH 1/6] feat(sandbox): decouple sandbox provider config from agent config Sandbox providers are now defined as named blocks in their own config files (e.g. nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml) that agents reference by name (sandbox_provider: sandbox). Swapping providers becomes swapping one config path in +config_paths, with no edits to the agent config. - Add resolve_provider_config to resolve a sandbox name (or an inline single-key mapping) to a single provider config. - Make mini_swe_agent_2's config provider-neutral and resolve the reference at runtime. - Document single / swap / multiple-sandbox usage, including distinct instance names for mixing providers or running two configs of the same provider type. Refs #1377 Signed-off-by: Ananth Subramaniam --- nemo_gym/sandbox/__init__.py | 2 + nemo_gym/sandbox/config.py | 117 ++++++++++++ .../opensandbox/configs/opensandbox.yaml | 41 +++++ .../mini_swe_agent_2/README.md | 171 ++++++++++++++---- responses_api_agents/mini_swe_agent_2/app.py | 10 +- ...opensandbox.yaml => mini_swe_agent_2.yaml} | 40 ++-- .../mini_swe_agent_2/tests/test_app.py | 49 +++++ tests/unit_tests/test_sandbox.py | 47 +++++ 8 files changed, 412 insertions(+), 65 deletions(-) create mode 100644 nemo_gym/sandbox/config.py create mode 100644 nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml rename responses_api_agents/mini_swe_agent_2/configs/{mini_swe_agent_opensandbox.yaml => mini_swe_agent_2.yaml} (52%) diff --git a/nemo_gym/sandbox/__init__.py b/nemo_gym/sandbox/__init__.py index 820fcbff71..25e613ed43 100644 --- a/nemo_gym/sandbox/__init__.py +++ b/nemo_gym/sandbox/__init__.py @@ -15,6 +15,7 @@ """Public sandbox API for NeMo Gym.""" from nemo_gym.sandbox.api import AsyncSandbox, Sandbox +from nemo_gym.sandbox.config import resolve_provider_config from nemo_gym.sandbox.providers import ( ExecResult, SandboxCreateError, @@ -49,5 +50,6 @@ "get_provider_class", "list_providers", "register_provider", + "resolve_provider_config", "rewrite_image", ] diff --git a/nemo_gym/sandbox/config.py b/nemo_gym/sandbox/config.py new file mode 100644 index 0000000000..6aa0c43580 --- /dev/null +++ b/nemo_gym/sandbox/config.py @@ -0,0 +1,117 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Resolve a sandbox provider reference into a provider config. + +An agent selects a sandbox by name (``sandbox_provider: sandbox``). The named +block lives in its own provider config file, so swapping providers is swapping a +``config_paths`` entry, not editing the agent config:: + + # nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml + sandbox: + opensandbox: + connection: { ... } + + # agent config + sandbox_provider: sandbox + +An inline single-key mapping (``{provider_name: {...}}``) is also accepted for +keeping everything in one file. +""" + +from collections.abc import Mapping +from typing import Any + + +def _to_plain_dict(value: Any) -> Any: + """Return a plain ``dict`` for mappings, including OmegaConf ``DictConfig``.""" + try: + from omegaconf import DictConfig, OmegaConf + except ImportError: # pragma: no cover - omegaconf is a core dependency + DictConfig = () # type: ignore[assignment] + OmegaConf = None # type: ignore[assignment] + + if OmegaConf is not None and isinstance(value, DictConfig): + return OmegaConf.to_container(value, resolve=True) + if isinstance(value, Mapping): + return dict(value) + return value + + +def _candidate_sandbox_names(named_configs: Mapping[str, Any] | None) -> list[str]: + """List top-level config keys that look like named sandbox provider blocks.""" + if not named_configs: + return [] + candidates: list[str] = [] + for key, value in named_configs.items(): + plain = _to_plain_dict(value) + if isinstance(plain, Mapping) and len(plain) == 1: + candidates.append(str(key)) + return sorted(candidates) + + +def resolve_provider_config( + sandbox_provider: str | Mapping[str, Any], + named_configs: Mapping[str, Any] | None = None, +) -> dict[str, Any]: + """Resolve a ``sandbox_provider`` field into a single-key provider config dict. + + Args: + sandbox_provider: Either the name of a top-level sandbox config block + (resolved from ``named_configs``) or an inline single-key provider + mapping of the form ``{provider_name: {...}}``. + named_configs: Mapping of top-level config name to config block, typically + the merged global config dict. Required when ``sandbox_provider`` is a + name reference. + + Returns: + A plain ``{provider_name: provider_kwargs}`` dict suitable for + :func:`nemo_gym.sandbox.create_provider`. + + Raises: + TypeError: If ``sandbox_provider`` is neither a string nor a mapping. + ValueError: If a named reference cannot be found, or if the resolved block + is not a single-key provider mapping. + """ + if isinstance(sandbox_provider, str): + name = sandbox_provider + if not name: + raise ValueError("Sandbox provider reference must be a non-empty string") + block = named_configs.get(name) if named_configs is not None else None + if block is None: + available = ", ".join(repr(n) for n in _candidate_sandbox_names(named_configs)) or "(none)" + raise ValueError( + f"Sandbox provider reference {name!r} is not defined in the merged config. " + f"Define a top-level '{name}:' block (e.g. via " + f"nemo_gym/sandbox/providers//configs/.yaml) and include it in " + f"your config_paths. Available sandbox configs: {available}" + ) + block = _to_plain_dict(block) + source = f"reference {name!r}" + elif isinstance(sandbox_provider, Mapping): + block = _to_plain_dict(sandbox_provider) + source = "inline sandbox_provider config" + else: + raise TypeError( + "sandbox_provider must be a name reference (str) or a single-key provider mapping, " + f"got {type(sandbox_provider).__name__}" + ) + + if not isinstance(block, Mapping) or len(block) != 1: + raise ValueError( + f"Sandbox provider config from {source} must be a single-key mapping " + f"{{provider_name: config}}, got: {block!r}" + ) + + return dict(block) diff --git a/nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml b/nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml new file mode 100644 index 0000000000..8345100092 --- /dev/null +++ b/nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml @@ -0,0 +1,41 @@ +# OpenSandbox sandbox provider config. +# +# `sandbox` is the instance name an agent references via `sandbox_provider: +# sandbox`; the child key `opensandbox` selects the provider class and its value +# is passed to the provider constructor. +# +# Every shipped provider config binds the same name `sandbox`, so swapping +# providers is swapping this config path in `+config_paths` (no agent edit): +# +# AGENT=responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_2.yaml +# MODEL=responses_api_models/vllm_model/configs/vllm_model.yaml +# ng_run "+config_paths=[$AGENT, nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml, $MODEL]" +# +# To run multiple sandboxes at once, give each block a distinct instance name +# (e.g. `opensandbox_foo`, `opensandbox_baz`) and reference each by name. +sandbox: + opensandbox: + connection: + domain: ${oc.env:OPENSANDBOX_DOMAIN,opensandbox-server.opensandbox-system.svc.cluster.local} + api_key: ${oc.env:OPENSANDBOX_API_KEY} + protocol: http + request_timeout_s: 300 + use_server_proxy: true + create: + request_timeout_s: 1200 + timeout_s: 1200 + skip_health_check: true + retries: 10 + retry_delay_s: 5.0 + retry_max_delay_s: 90.0 + probe: + timeout_s: 60 + deadline_s: 180 + stable_count: 2 + stable_delay_s: 1.0 + operations: + retries: 5 + retry_delay_s: 1.0 + retry_max_delay_s: 45.0 + command_retries: 0 + close_timeout_s: 30 diff --git a/responses_api_agents/mini_swe_agent_2/README.md b/responses_api_agents/mini_swe_agent_2/README.md index 1170f8a215..51a5e5346a 100644 --- a/responses_api_agents/mini_swe_agent_2/README.md +++ b/responses_api_agents/mini_swe_agent_2/README.md @@ -96,9 +96,48 @@ rewrites. ## Configuration +### How sandboxes are configured + +There is one concept to learn: **a sandbox is a named block**, and an agent points +at it by name. + +```yaml +# A named sandbox: maps to maps to that provider's config. +sandbox: # instance name (the handle the agent references) + opensandbox: # provider registry key -> provider class + connection: { ... } # provider-specific config +``` + +```yaml +# An agent selects a sandbox by name: +sandbox_provider: sandbox +``` + +The framework only ever resolves *a name -> one provider config*. Everything else +falls out of how you name and reference blocks: + +- **Single sandbox (default).** Ship a `sandbox` block; the agent defaults to + `sandbox_provider: sandbox`. Done. +- **Swap providers (no agent edit).** Every shipped provider config binds the same + name `sandbox`, so swapping providers is just swapping one config path in + `+config_paths`. +- **Multiple / mixed / same-type sandboxes.** Give blocks **distinct instance + names** (e.g. `opensandbox_foo`, `opensandbox_baz`) and reference each by name. + See [Advanced: multiple sandboxes](#advanced-multiple-sandboxes). + +> Names are arbitrary instance names, not provider types. Two config files that +> bind the **same** name merge last-wins (that is the swap mechanism); to run +> several at once, use distinct names. + ### Agent Configuration -Path - `responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_opensandbox.yaml` +The agent config is **provider-neutral**: it selects a sandbox by name via +`sandbox_provider`, and the named block lives in a separate provider config file. +This decouples the agent from any specific sandbox provider so you can swap +providers by swapping a single config path in `+config_paths` — no edits to the +agent config. + +Path - `responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_2.yaml` ```yaml mini_swe_agent_2: @@ -106,39 +145,16 @@ mini_swe_agent_2: mini_swe_agent_2: entrypoint: app.py domain: coding - description: Software engineering tasks driven by mini-swe-agent harness on OpenSandbox. + description: Software engineering tasks driven by mini-swe-agent harness on a Gym sandbox. value: Improve agentic software engineering capabilities. model_server: type: responses_api_models name: policy_model concurrency: 64 env: sandbox - sandbox_provider: - opensandbox: - connection: - domain: opensandbox-server.opensandbox-system.svc.cluster.local - api_key: ${oc.env:OPENSANDBOX_API_KEY} - protocol: http - request_timeout_s: 300 - use_server_proxy: true - create: - request_timeout_s: 1200 - timeout_s: 1200 - skip_health_check: true - retries: 10 - retry_delay_s: 5.0 - retry_max_delay_s: 90.0 - probe: - timeout_s: 60 - deadline_s: 180 - stable_count: 2 - stable_delay_s: 1.0 - operations: - retries: 5 - retry_delay_s: 1.0 - retry_max_delay_s: 45.0 - command_retries: 0 - close_timeout_s: 30 + # Name of the sandbox to use; defined in a separate provider config (see + # "Sandbox Provider Configuration" below). + sandbox_provider: sandbox sandbox_spec: ttl_s: 18000 ready_timeout_s: 1200 @@ -153,7 +169,6 @@ mini_swe_agent_2: metadata: benchmark: swebench-verified harness: mini-swe-agent - sandbox-api: opensandbox-sdk sandbox_environment_kwargs: cwd: /testbed conda_env: testbed @@ -166,12 +181,94 @@ mini_swe_agent_2: step_limit: 250 ``` +`sandbox_provider` accepts either a name reference (resolved from a top-level +sandbox block in the merged config, the recommended decoupled form) or an inline +single-key provider mapping (`{provider_name: {...}}`) when you prefer to keep +everything in one file. + +### Sandbox Provider Configuration + +Each provider ships its own config file that defines a named sandbox block. The +default OpenSandbox config is: + +Path - `nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml` + +```yaml +sandbox: # name referenced by the agent's sandbox_provider + opensandbox: # provider registry key -> provider class + connection: + domain: ${oc.env:OPENSANDBOX_DOMAIN,opensandbox-server.opensandbox-system.svc.cluster.local} + api_key: ${oc.env:OPENSANDBOX_API_KEY} + protocol: http + request_timeout_s: 300 + use_server_proxy: true + create: + request_timeout_s: 1200 + timeout_s: 1200 + skip_health_check: true + retries: 10 + retry_delay_s: 5.0 + retry_max_delay_s: 90.0 + probe: + timeout_s: 60 + deadline_s: 180 + stable_count: 2 + stable_delay_s: 1.0 + operations: + retries: 5 + retry_delay_s: 1.0 + retry_max_delay_s: 45.0 + command_retries: 0 + close_timeout_s: 30 +``` + +To use a different provider, add a config file under +`nemo_gym/sandbox/providers//configs/.yaml` that defines a +`sandbox` block (the name the agent references) with that provider's registry key, +then point `+config_paths` at it instead — no agent edit required. + Optional `sandbox_resource_profiles` can be configured as a list of resource maps. When present, the agent hashes `instance_id` and deterministically merges one profile into `sandbox_spec.resources`. This is useful for spreading SWE-bench tasks across a small set of resource sizes without changing the input data. +### Advanced: multiple sandboxes + +The default convention (every provider file binds the name `sandbox`) is optimized +for the single-sandbox case and path-only swapping. To run more than one sandbox +in the same `ng_run`, give each block a **distinct instance name** and reference it +explicitly. Because names are arbitrary, this covers every multi-sandbox case +without any framework change: + +- **Different providers at once** (e.g. one agent on OpenSandbox, a grader on + another provider): + + ```yaml + sandbox_rollout: + opensandbox: { ... } + sandbox_grading: + docker: { ... } + ``` + +- **Two configs of the same provider type** (e.g. two OpenSandbox endpoints — note + the same inner `opensandbox` key, distinct outer instance names): + + ```yaml + opensandbox_foo: + opensandbox: { connection: { domain: foo... } } + opensandbox_baz: + opensandbox: { connection: { domain: baz... } } + ``` + +Each agent then references the instance it needs (`sandbox_provider: +opensandbox_foo`). Whether a single agent consumes one or several sandboxes is +part of that agent's config contract; `mini_swe_agent_2` uses exactly one sandbox +per task. + +> Reminder: do not give two included config files the same instance name unless you +> intend swap-by-replace — same name merges last-wins. + ### Model Parameters `MiniSWEAgent.run()` maps supported Responses API fields into mini-swe-agent @@ -212,11 +309,16 @@ policy_api_key: dummy-key policy_model_name: ``` -Start the mini-swe-agent 2 server with the OpenSandbox provider and a policy -model server. The values below show a representative SWE-bench eval setup: +Start the mini-swe-agent 2 server by composing three config paths: the +provider-neutral agent config, a sandbox provider config, and a policy model +server config. To swap providers, change only the sandbox provider path. The +values below show a representative SWE-bench eval setup with OpenSandbox: ```bash -CONFIG_PATHS="responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_opensandbox.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml" +AGENT="responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_2.yaml" +SANDBOX="nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml" +MODEL="responses_api_models/vllm_model/configs/vllm_model.yaml" +CONFIG_PATHS="$AGENT,$SANDBOX,$MODEL" ng_run "+config_paths=[$CONFIG_PATHS]" \ +mini_swe_agent_2.responses_api_agents.mini_swe_agent_2.concurrency=64 \ @@ -276,8 +378,9 @@ outer per-sample guard there. `MiniSWESandboxEnvironment` adapts mini-swe-agent's synchronous environment contract to `nemo_gym.sandbox.Sandbox`. -When `env` is `sandbox`, Gym injects this environment config before calling -mini-swe-agent: +When `env` is `sandbox`, the agent resolves `sandbox_provider` (name reference or +inline mapping) to a single-key provider config and Gym injects this environment +config before calling mini-swe-agent: ```yaml environment: diff --git a/responses_api_agents/mini_swe_agent_2/app.py b/responses_api_agents/mini_swe_agent_2/app.py index febe8fe696..a175aeacc2 100644 --- a/responses_api_agents/mini_swe_agent_2/app.py +++ b/responses_api_agents/mini_swe_agent_2/app.py @@ -47,6 +47,7 @@ NeMoGymResponseCreateParamsNonStreaming, ) from nemo_gym.reward_profile import compute_pass_majority_metrics, highest_k_metrics +from nemo_gym.sandbox import resolve_provider_config from nemo_gym.server_utils import ( ServerClient, get_first_server_config_dict, @@ -61,7 +62,9 @@ class MiniSWEAgentConfig(BaseResponsesAPIAgentConfig): model_server: ModelServerRef env: Literal["sandbox"] concurrency: int - sandbox_provider: Optional[dict[str, Any]] = None + # A sandbox name resolved from a separate provider config (e.g. "sandbox"), + # or an inline single-key provider mapping ({provider_name: {...}}). + sandbox_provider: Optional[str | dict[str, Any]] = None sandbox_spec: Optional[dict[str, Any]] = None sandbox_environment_kwargs: Optional[dict[str, Any]] = None run_golden: bool = False @@ -747,8 +750,9 @@ async def run(self, body: MiniSWEAgentRunRequest) -> MiniSWEAgentVerifyResponse: should_write_config = bool(model_kwargs) if self.config.sandbox_provider is None: raise ValueError("mini_swe_agent_2 requires sandbox_provider") + resolved_sandbox_provider = resolve_provider_config(self.config.sandbox_provider, global_config_dict) config.setdefault("environment", {}).update(self.config.sandbox_environment_kwargs or {}) - config["environment"]["provider"] = _sandbox_provider_for_config_dump(self.config.sandbox_provider) + config["environment"]["provider"] = _sandbox_provider_for_config_dump(resolved_sandbox_provider) config["environment"]["spec"] = _sandbox_spec_for_instance( self.config.sandbox_spec, resource_profiles=self.config.sandbox_resource_profiles, @@ -791,7 +795,7 @@ async def run(self, body: MiniSWEAgentRunRequest) -> MiniSWEAgentVerifyResponse: step_limit=step_limit, ) runner = runner_ray_remote - runtime_env = _sandbox_runtime_env(self.config.sandbox_provider) + runtime_env = _sandbox_runtime_env(resolved_sandbox_provider) if runtime_env.get("env_vars"): runner = runner.options(runtime_env=runtime_env) future = runner.remote(run_mini_swe_with_sandbox, params) diff --git a/responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_opensandbox.yaml b/responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_2.yaml similarity index 52% rename from responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_opensandbox.yaml rename to responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_2.yaml index c6fffd12ca..9f225012aa 100644 --- a/responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_opensandbox.yaml +++ b/responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_2.yaml @@ -1,41 +1,26 @@ +# Provider-neutral mini-swe-agent 2 config. +# +# `sandbox_provider` names the sandbox to use; its config lives in a separate +# provider file, so swapping providers is swapping that path in `+config_paths`: +# +# AGENT=responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_2.yaml +# MODEL=responses_api_models/vllm_model/configs/vllm_model.yaml +# ng_run "+config_paths=[$AGENT, nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml, $MODEL]" mini_swe_agent_2: responses_api_agents: mini_swe_agent_2: entrypoint: app.py domain: coding - description: Software engineering tasks driven by mini-swe-agent harness on OpenSandbox. + description: Software engineering tasks driven by mini-swe-agent harness on a Gym sandbox. value: Improve agentic software engineering capabilities. model_server: type: responses_api_models name: policy_model concurrency: 64 env: sandbox - sandbox_provider: - opensandbox: - connection: - domain: opensandbox-server.opensandbox-system.svc.cluster.local - api_key: ${oc.env:OPENSANDBOX_API_KEY} - protocol: http - request_timeout_s: 300 - use_server_proxy: true - create: - request_timeout_s: 1200 - timeout_s: 1200 - skip_health_check: true - retries: 10 - retry_delay_s: 5.0 - retry_max_delay_s: 90.0 - probe: - timeout_s: 60 - deadline_s: 180 - stable_count: 2 - stable_delay_s: 1.0 - operations: - retries: 5 - retry_delay_s: 1.0 - retry_max_delay_s: 45.0 - command_retries: 0 - close_timeout_s: 30 + # Name of the sandbox to use; include a provider config that defines a + # `sandbox` block (e.g. nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml). + sandbox_provider: sandbox sandbox_spec: ttl_s: 18000 ready_timeout_s: 1200 @@ -50,7 +35,6 @@ mini_swe_agent_2: metadata: benchmark: swebench-verified harness: mini-swe-agent - sandbox-api: opensandbox-sdk sandbox_environment_kwargs: cwd: /testbed conda_env: testbed diff --git a/responses_api_agents/mini_swe_agent_2/tests/test_app.py b/responses_api_agents/mini_swe_agent_2/tests/test_app.py index 57ce79ad09..f2404b178e 100644 --- a/responses_api_agents/mini_swe_agent_2/tests/test_app.py +++ b/responses_api_agents/mini_swe_agent_2/tests/test_app.py @@ -766,6 +766,55 @@ async def test_run_writes_generation_params_to_config( "chat_template_kwargs": {"enable_thinking": True}, } + @patch("responses_api_agents.mini_swe_agent_2.app.ServerClient.load_from_global_config") + @patch("responses_api_agents.mini_swe_agent_2.app.get_first_server_config_dict") + @patch("responses_api_agents.mini_swe_agent_2.app.get_config_path") + @patch("responses_api_agents.mini_swe_agent_2.app.runner_ray_remote") + @patch("asyncio.to_thread") + async def test_run_resolves_named_sandbox_provider_reference( + self, + mock_to_thread, + mock_runner_ray_remote, + mock_get_config_path, + mock_get_first_server_config_dict, + mock_load_from_global_config, + tmp_path, + monkeypatch, + ) -> None: + monkeypatch.chdir(tmp_path) + config = create_test_config() + config.sandbox_provider = "sandbox" + mock_server_client = MagicMock(spec=ServerClient) + server = MiniSWEAgent(config=config, server_client=mock_server_client) + + mock_server_client_instance = MagicMock() + mock_server_client_instance.global_config_dict = { + "policy_model_name": "test_model", + "sandbox": { + "opensandbox": { + "connection": { + "domain": "sandbox.example", + "api_key": "fixture-value", # pragma: allowlist secret + } + } + }, + } + mock_load_from_global_config.return_value = mock_server_client_instance + mock_get_first_server_config_dict.return_value = {"host": "0.0.0.0", "port": 8080} + setup_config_path_mock(mock_get_config_path) + setup_run_mini_swe_mock(mock_to_thread, mock_runner_ray_remote) + + await server.run(create_run_request()) + + runtime_env = mock_runner_ray_remote.options.call_args.kwargs["runtime_env"] + assert runtime_env["env_vars"] == {OPENSANDBOX_API_KEY_ENV: "fixture-value"} # pragma: allowlist secret + call_args = mock_runner_ray_remote.options.return_value.remote.call_args + params = call_args.args[1] + generated_config = yaml.safe_load(Path(params["config"]).read_text()) + provider = generated_config["environment"]["provider"]["opensandbox"] + assert provider["connection"]["domain"] == "sandbox.example" + assert "api_key" not in provider["connection"] + @patch("responses_api_agents.mini_swe_agent_2.app.ServerClient.load_from_global_config") @patch("responses_api_agents.mini_swe_agent_2.app.get_first_server_config_dict") @patch("responses_api_agents.mini_swe_agent_2.app.get_config_path") diff --git a/tests/unit_tests/test_sandbox.py b/tests/unit_tests/test_sandbox.py index c26f660b3e..fd155608a0 100644 --- a/tests/unit_tests/test_sandbox.py +++ b/tests/unit_tests/test_sandbox.py @@ -36,6 +36,7 @@ get_provider_class, list_providers, register_provider, + resolve_provider_config, ) from nemo_gym.sandbox.api import _AsyncLoopRunner from nemo_gym.sandbox.utils import rewrite_image @@ -407,6 +408,52 @@ def __init__(self) -> None: Sandbox({failing_provider_name: {}}) +def test_resolve_provider_config_named_reference() -> None: + global_config = { + "policy_model_name": "test_model", + "sandbox_main": {"opensandbox": {"connection": {"domain": "sandbox.example"}}}, + } + + resolved = resolve_provider_config("sandbox_main", global_config) + assert resolved == {"opensandbox": {"connection": {"domain": "sandbox.example"}}} + + # An OmegaConf DictConfig block resolves to a plain dict. + from omegaconf import OmegaConf + + omega_config = OmegaConf.create(global_config) + resolved_from_omega = resolve_provider_config("sandbox_main", omega_config) + assert resolved_from_omega == {"opensandbox": {"connection": {"domain": "sandbox.example"}}} + assert isinstance(resolved_from_omega["opensandbox"], dict) + + +def test_resolve_provider_config_inline_mapping() -> None: + inline = {"opensandbox": {"connection": {}}} + assert resolve_provider_config(inline) == inline + # The result is a fresh dict, not the same object. + assert resolve_provider_config(inline) is not inline + + +def test_resolve_provider_config_errors() -> None: + with pytest.raises(ValueError, match="non-empty string"): + resolve_provider_config("", {}) + + with pytest.raises(ValueError, match="is not defined in the merged config"): + resolve_provider_config("missing", {"sandbox_main": {"opensandbox": {}}}) + + # Error lists available single-key sandbox blocks as candidates. + with pytest.raises(ValueError, match="'sandbox_main'"): + resolve_provider_config("missing", {"sandbox_main": {"opensandbox": {}}}) + + with pytest.raises(TypeError, match="must be a name reference"): + resolve_provider_config(123) # type: ignore[arg-type] + + with pytest.raises(ValueError, match="single-key mapping"): + resolve_provider_config({"opensandbox": {}, "extra": {}}) + + with pytest.raises(ValueError, match="single-key mapping"): + resolve_provider_config("sandbox_main", {"sandbox_main": {}}) + + def test_async_sandbox_transfer_fallback_and_unknown_status(tmp_path: Path) -> None: asyncio.run(_assert_async_sandbox_transfer_fallback_and_unknown_status(tmp_path)) From ce625665edcadcdafb19b7682c3bd3c3b040a082 Mon Sep 17 00:00:00 2001 From: Ananth Subramaniam Date: Wed, 24 Jun 2026 01:56:05 -0700 Subject: [PATCH 2/6] feat(sandbox): let provider configs contribute default sandbox metadata A sandbox block may carry an optional `default_metadata` key whose entries are merged into each sandbox's spec metadata (SandboxSpec.metadata), with the agent's own sandbox_spec.metadata taking precedence. This keeps provider-identifying tags with the provider config instead of the provider-neutral agent config. - Add resolve_provider_metadata and exclude reserved keys (default_metadata) from resolve_provider_config. - mini_swe_agent_2 merges provider default_metadata into the sandbox spec metadata. - Restore sandbox-api: opensandbox-sdk via the opensandbox provider config. Refs #1377 Signed-off-by: Ananth Subramaniam --- nemo_gym/sandbox/__init__.py | 3 +- nemo_gym/sandbox/config.py | 113 +++++++++++++----- .../opensandbox/configs/opensandbox.yaml | 5 + .../mini_swe_agent_2/README.md | 8 ++ responses_api_agents/mini_swe_agent_2/app.py | 9 +- .../mini_swe_agent_2/tests/test_app.py | 5 +- tests/unit_tests/test_sandbox.py | 26 +++- 7 files changed, 132 insertions(+), 37 deletions(-) diff --git a/nemo_gym/sandbox/__init__.py b/nemo_gym/sandbox/__init__.py index 25e613ed43..e85f5728db 100644 --- a/nemo_gym/sandbox/__init__.py +++ b/nemo_gym/sandbox/__init__.py @@ -15,7 +15,7 @@ """Public sandbox API for NeMo Gym.""" from nemo_gym.sandbox.api import AsyncSandbox, Sandbox -from nemo_gym.sandbox.config import resolve_provider_config +from nemo_gym.sandbox.config import resolve_provider_config, resolve_provider_metadata from nemo_gym.sandbox.providers import ( ExecResult, SandboxCreateError, @@ -51,5 +51,6 @@ "list_providers", "register_provider", "resolve_provider_config", + "resolve_provider_metadata", "rewrite_image", ] diff --git a/nemo_gym/sandbox/config.py b/nemo_gym/sandbox/config.py index 6aa0c43580..c0da2d73bd 100644 --- a/nemo_gym/sandbox/config.py +++ b/nemo_gym/sandbox/config.py @@ -28,12 +28,26 @@ An inline single-key mapping (``{provider_name: {...}}``) is also accepted for keeping everything in one file. + +A block may also carry a reserved ``default_metadata`` key. Its entries are merged +into the sandbox's ``SandboxSpec.metadata`` as defaults (the agent's own +``sandbox_spec.metadata`` overrides them), so provider-identifying tags live with +the provider rather than the agent config:: + + sandbox: + opensandbox: { connection: { ... } } + default_metadata: { sandbox-api: opensandbox-sdk } """ from collections.abc import Mapping from typing import Any +# Reserved keys inside a sandbox block that are not the provider config. +SANDBOX_BLOCK_DEFAULT_METADATA_KEY = "default_metadata" +SANDBOX_BLOCK_RESERVED_KEYS = frozenset({SANDBOX_BLOCK_DEFAULT_METADATA_KEY}) + + def _to_plain_dict(value: Any) -> Any: """Return a plain ``dict`` for mappings, including OmegaConf ``DictConfig``.""" try: @@ -49,6 +63,11 @@ def _to_plain_dict(value: Any) -> Any: return value +def _provider_keys(block: Mapping[str, Any]) -> list[str]: + """Return the provider keys in a block (everything but reserved keys).""" + return [key for key in block if key not in SANDBOX_BLOCK_RESERVED_KEYS] + + def _candidate_sandbox_names(named_configs: Mapping[str, Any] | None) -> list[str]: """List top-level config keys that look like named sandbox provider blocks.""" if not named_configs: @@ -56,11 +75,38 @@ def _candidate_sandbox_names(named_configs: Mapping[str, Any] | None) -> list[st candidates: list[str] = [] for key, value in named_configs.items(): plain = _to_plain_dict(value) - if isinstance(plain, Mapping) and len(plain) == 1: + if isinstance(plain, Mapping) and len(_provider_keys(plain)) == 1: candidates.append(str(key)) return sorted(candidates) +def _resolve_block( + sandbox_provider: str | Mapping[str, Any], + named_configs: Mapping[str, Any] | None, +) -> tuple[dict[str, Any], str]: + """Resolve a ``sandbox_provider`` reference or inline mapping to a plain block.""" + if isinstance(sandbox_provider, str): + name = sandbox_provider + if not name: + raise ValueError("Sandbox provider reference must be a non-empty string") + block = named_configs.get(name) if named_configs is not None else None + if block is None: + available = ", ".join(repr(n) for n in _candidate_sandbox_names(named_configs)) or "(none)" + raise ValueError( + f"Sandbox provider reference {name!r} is not defined in the merged config. " + f"Define a top-level '{name}:' block (e.g. via " + f"nemo_gym/sandbox/providers//configs/.yaml) and include it in " + f"your config_paths. Available sandbox configs: {available}" + ) + return _to_plain_dict(block), f"reference {name!r}" + if isinstance(sandbox_provider, Mapping): + return _to_plain_dict(sandbox_provider), "inline sandbox_provider config" + raise TypeError( + "sandbox_provider must be a name reference (str) or a single-key provider mapping, " + f"got {type(sandbox_provider).__name__}" + ) + + def resolve_provider_config( sandbox_provider: str | Mapping[str, Any], named_configs: Mapping[str, Any] | None = None, @@ -77,41 +123,46 @@ def resolve_provider_config( Returns: A plain ``{provider_name: provider_kwargs}`` dict suitable for - :func:`nemo_gym.sandbox.create_provider`. + :func:`nemo_gym.sandbox.create_provider`. Reserved keys such as + ``default_metadata`` are excluded; read them with + :func:`resolve_provider_metadata`. Raises: TypeError: If ``sandbox_provider`` is neither a string nor a mapping. - ValueError: If a named reference cannot be found, or if the resolved block - is not a single-key provider mapping. + ValueError: If a named reference cannot be found, or if the block does not + hold exactly one provider key. """ - if isinstance(sandbox_provider, str): - name = sandbox_provider - if not name: - raise ValueError("Sandbox provider reference must be a non-empty string") - block = named_configs.get(name) if named_configs is not None else None - if block is None: - available = ", ".join(repr(n) for n in _candidate_sandbox_names(named_configs)) or "(none)" - raise ValueError( - f"Sandbox provider reference {name!r} is not defined in the merged config. " - f"Define a top-level '{name}:' block (e.g. via " - f"nemo_gym/sandbox/providers//configs/.yaml) and include it in " - f"your config_paths. Available sandbox configs: {available}" - ) - block = _to_plain_dict(block) - source = f"reference {name!r}" - elif isinstance(sandbox_provider, Mapping): - block = _to_plain_dict(sandbox_provider) - source = "inline sandbox_provider config" - else: - raise TypeError( - "sandbox_provider must be a name reference (str) or a single-key provider mapping, " - f"got {type(sandbox_provider).__name__}" - ) + block, source = _resolve_block(sandbox_provider, named_configs) + if not isinstance(block, Mapping): + raise ValueError(f"Sandbox provider config from {source} must be a mapping, got: {block!r}") - if not isinstance(block, Mapping) or len(block) != 1: + provider_keys = _provider_keys(block) + if len(provider_keys) != 1: raise ValueError( - f"Sandbox provider config from {source} must be a single-key mapping " - f"{{provider_name: config}}, got: {block!r}" + f"Sandbox provider config from {source} must have exactly one provider key " + f"{{provider_name: config}}, got keys: {provider_keys!r}" ) - return dict(block) + return {provider_keys[0]: block[provider_keys[0]]} + + +def resolve_provider_metadata( + sandbox_provider: str | Mapping[str, Any], + named_configs: Mapping[str, Any] | None = None, +) -> dict[str, Any]: + """Return a sandbox block's ``default_metadata``. + + These are provider-contributed defaults to merge into ``SandboxSpec.metadata``. + Returns an empty dict when the block has no ``default_metadata`` key. See + :func:`resolve_provider_config` for argument semantics. + """ + block, source = _resolve_block(sandbox_provider, named_configs) + if not isinstance(block, Mapping): + raise ValueError(f"Sandbox provider config from {source} must be a mapping, got: {block!r}") + + metadata = block.get(SANDBOX_BLOCK_DEFAULT_METADATA_KEY) or {} + if not isinstance(metadata, Mapping): + raise ValueError( + f"Sandbox '{SANDBOX_BLOCK_DEFAULT_METADATA_KEY}' from {source} must be a mapping, got: {metadata!r}" + ) + return dict(metadata) diff --git a/nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml b/nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml index 8345100092..708ceac31c 100644 --- a/nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml +++ b/nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml @@ -13,7 +13,12 @@ # # To run multiple sandboxes at once, give each block a distinct instance name # (e.g. `opensandbox_foo`, `opensandbox_baz`) and reference each by name. +# +# `default_metadata` (optional) is merged into every sandbox's spec metadata +# (SandboxSpec.metadata); an agent's own sandbox_spec.metadata overrides it. sandbox: + default_metadata: + sandbox-api: opensandbox-sdk opensandbox: connection: domain: ${oc.env:OPENSANDBOX_DOMAIN,opensandbox-server.opensandbox-system.svc.cluster.local} diff --git a/responses_api_agents/mini_swe_agent_2/README.md b/responses_api_agents/mini_swe_agent_2/README.md index 51a5e5346a..9a81bf1950 100644 --- a/responses_api_agents/mini_swe_agent_2/README.md +++ b/responses_api_agents/mini_swe_agent_2/README.md @@ -195,6 +195,8 @@ Path - `nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml` ```yaml sandbox: # name referenced by the agent's sandbox_provider + default_metadata: # optional: merged into sandbox spec metadata (see below) + sandbox-api: opensandbox-sdk opensandbox: # provider registry key -> provider class connection: domain: ${oc.env:OPENSANDBOX_DOMAIN,opensandbox-server.opensandbox-system.svc.cluster.local} @@ -227,6 +229,12 @@ To use a different provider, add a config file under `sandbox` block (the name the agent references) with that provider's registry key, then point `+config_paths` at it instead — no agent edit required. +An optional `default_metadata` key holds provider-contributed defaults that are +merged into each sandbox's spec metadata (`SandboxSpec.metadata`); the agent's own +`sandbox_spec.metadata` overrides them on conflict. This keeps provider-identifying +tags (e.g. `sandbox-api: opensandbox-sdk`) with the provider rather than in the +agent config. + Optional `sandbox_resource_profiles` can be configured as a list of resource maps. When present, the agent hashes `instance_id` and deterministically merges one profile into `sandbox_spec.resources`. This is useful for spreading diff --git a/responses_api_agents/mini_swe_agent_2/app.py b/responses_api_agents/mini_swe_agent_2/app.py index a175aeacc2..7cea632365 100644 --- a/responses_api_agents/mini_swe_agent_2/app.py +++ b/responses_api_agents/mini_swe_agent_2/app.py @@ -47,7 +47,7 @@ NeMoGymResponseCreateParamsNonStreaming, ) from nemo_gym.reward_profile import compute_pass_majority_metrics, highest_k_metrics -from nemo_gym.sandbox import resolve_provider_config +from nemo_gym.sandbox import resolve_provider_config, resolve_provider_metadata from nemo_gym.server_utils import ( ServerClient, get_first_server_config_dict, @@ -751,13 +751,18 @@ async def run(self, body: MiniSWEAgentRunRequest) -> MiniSWEAgentVerifyResponse: if self.config.sandbox_provider is None: raise ValueError("mini_swe_agent_2 requires sandbox_provider") resolved_sandbox_provider = resolve_provider_config(self.config.sandbox_provider, global_config_dict) + provider_default_metadata = resolve_provider_metadata(self.config.sandbox_provider, global_config_dict) config.setdefault("environment", {}).update(self.config.sandbox_environment_kwargs or {}) config["environment"]["provider"] = _sandbox_provider_for_config_dump(resolved_sandbox_provider) - config["environment"]["spec"] = _sandbox_spec_for_instance( + instance_spec = _sandbox_spec_for_instance( self.config.sandbox_spec, resource_profiles=self.config.sandbox_resource_profiles, instance_id=instance_id, ) + if provider_default_metadata: + # Provider defaults first; the agent's own spec metadata wins on conflict. + instance_spec["metadata"] = {**provider_default_metadata, **(instance_spec.get("metadata") or {})} + config["environment"]["spec"] = instance_spec should_write_config = True if should_write_config: diff --git a/responses_api_agents/mini_swe_agent_2/tests/test_app.py b/responses_api_agents/mini_swe_agent_2/tests/test_app.py index f2404b178e..00bed9871c 100644 --- a/responses_api_agents/mini_swe_agent_2/tests/test_app.py +++ b/responses_api_agents/mini_swe_agent_2/tests/test_app.py @@ -791,12 +791,13 @@ async def test_run_resolves_named_sandbox_provider_reference( mock_server_client_instance.global_config_dict = { "policy_model_name": "test_model", "sandbox": { + "default_metadata": {"sandbox-api": "opensandbox-sdk"}, "opensandbox": { "connection": { "domain": "sandbox.example", "api_key": "fixture-value", # pragma: allowlist secret } - } + }, }, } mock_load_from_global_config.return_value = mock_server_client_instance @@ -814,6 +815,8 @@ async def test_run_resolves_named_sandbox_provider_reference( provider = generated_config["environment"]["provider"]["opensandbox"] assert provider["connection"]["domain"] == "sandbox.example" assert "api_key" not in provider["connection"] + # Provider default_metadata flows into the sandbox spec metadata. + assert generated_config["environment"]["spec"]["metadata"]["sandbox-api"] == "opensandbox-sdk" @patch("responses_api_agents.mini_swe_agent_2.app.ServerClient.load_from_global_config") @patch("responses_api_agents.mini_swe_agent_2.app.get_first_server_config_dict") diff --git a/tests/unit_tests/test_sandbox.py b/tests/unit_tests/test_sandbox.py index fd155608a0..d4e6d7bbcd 100644 --- a/tests/unit_tests/test_sandbox.py +++ b/tests/unit_tests/test_sandbox.py @@ -37,6 +37,7 @@ list_providers, register_provider, resolve_provider_config, + resolve_provider_metadata, ) from nemo_gym.sandbox.api import _AsyncLoopRunner from nemo_gym.sandbox.utils import rewrite_image @@ -447,13 +448,34 @@ def test_resolve_provider_config_errors() -> None: with pytest.raises(TypeError, match="must be a name reference"): resolve_provider_config(123) # type: ignore[arg-type] - with pytest.raises(ValueError, match="single-key mapping"): + with pytest.raises(ValueError, match="exactly one provider key"): resolve_provider_config({"opensandbox": {}, "extra": {}}) - with pytest.raises(ValueError, match="single-key mapping"): + with pytest.raises(ValueError, match="exactly one provider key"): resolve_provider_config("sandbox_main", {"sandbox_main": {}}) +def test_resolve_provider_metadata() -> None: + block = { + "opensandbox": {"connection": {"domain": "sandbox.example"}}, + "default_metadata": {"sandbox-api": "opensandbox-sdk"}, + } + + # default_metadata is excluded from the provider config and read separately. + assert resolve_provider_config(block) == {"opensandbox": {"connection": {"domain": "sandbox.example"}}} + assert resolve_provider_metadata(block) == {"sandbox-api": "opensandbox-sdk"} + + # A named reference works the same way. + global_config = {"sandbox": block} + assert resolve_provider_metadata("sandbox", global_config) == {"sandbox-api": "opensandbox-sdk"} + + # No default_metadata key -> empty dict. + assert resolve_provider_metadata({"opensandbox": {}}) == {} + + with pytest.raises(ValueError, match="must be a mapping"): + resolve_provider_metadata({"opensandbox": {}, "default_metadata": "not-a-mapping"}) + + def test_async_sandbox_transfer_fallback_and_unknown_status(tmp_path: Path) -> None: asyncio.run(_assert_async_sandbox_transfer_fallback_and_unknown_status(tmp_path)) From 0b1c4d54253f2f9e402060bcae1b106786279382 Mon Sep 17 00:00:00 2001 From: Ananth Subramaniam Date: Wed, 24 Jun 2026 02:36:53 -0700 Subject: [PATCH 3/6] feat(sandbox): discover sandbox providers via entry points Add a `nemo_gym.sandbox_providers` entry point group so a separate package can publish a sandbox provider that becomes available on install/import, without editing the registry. Lookup precedence is explicit registration > built-in loaders > entry points; discovery is cached. Refs #1377 Signed-off-by: Ananth Subramaniam --- nemo_gym/sandbox/providers/registry.py | 45 ++++++++++++++----- .../mini_swe_agent_2/README.md | 8 ++++ tests/unit_tests/test_sandbox.py | 27 +++++++++++ 3 files changed, 69 insertions(+), 11 deletions(-) diff --git a/nemo_gym/sandbox/providers/registry.py b/nemo_gym/sandbox/providers/registry.py index 8c4e39e577..a0e6449663 100644 --- a/nemo_gym/sandbox/providers/registry.py +++ b/nemo_gym/sandbox/providers/registry.py @@ -12,9 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Provider registration utilities.""" +"""Provider registration utilities. + +Providers can be made available three ways, in lookup precedence order: + +1. ``register_provider(name, cls)`` — explicit in-process registration. +2. Built-in loaders shipped with NeMo Gym (e.g. ``opensandbox``). +3. Python entry points in the ``nemo_gym.sandbox_providers`` group, so a separate + package can publish a provider that becomes available on install. Declare one + in that package's ``pyproject.toml``:: + + [project.entry-points."nemo_gym.sandbox_providers"] + my_provider = "my_pkg.provider:MyProvider" +""" from collections.abc import Callable, Mapping +from importlib.metadata import entry_points from typing import Any, TypeAlias from nemo_gym.sandbox.providers.base import SandboxProvider @@ -23,8 +36,19 @@ ProviderClass: TypeAlias = type[SandboxProvider] ProviderLoader: TypeAlias = Callable[[], ProviderClass] +ENTRY_POINT_GROUP = "nemo_gym.sandbox_providers" + _PROVIDER_REGISTRY: dict[str, ProviderClass] = {} _BUILTIN_PROVIDER_LOADERS: dict[str, ProviderLoader] = {} +_ENTRY_POINT_LOADERS: dict[str, ProviderLoader] | None = None + + +def _entry_point_loaders() -> dict[str, ProviderLoader]: + """Discover provider loaders from installed entry points (cached).""" + global _ENTRY_POINT_LOADERS + if _ENTRY_POINT_LOADERS is None: + _ENTRY_POINT_LOADERS = {ep.name: ep.load for ep in entry_points(group=ENTRY_POINT_GROUP)} + return _ENTRY_POINT_LOADERS def register_provider(name: str, provider_class: ProviderClass, *, override: bool = False) -> None: @@ -37,15 +61,14 @@ def register_provider(name: str, provider_class: ProviderClass, *, override: boo def get_provider_class(name: str) -> ProviderClass: - """Return a registered provider class.""" - try: + """Return a provider class by name (explicit > built-in > entry point).""" + if name in _PROVIDER_REGISTRY: return _PROVIDER_REGISTRY[name] - except KeyError as e: - loader = _BUILTIN_PROVIDER_LOADERS.get(name) - if loader is not None: - return loader() - available = ", ".join(list_providers()) or "" - raise ValueError(f"Unknown sandbox provider {name!r}. Available providers: {available}") from e + loader = _BUILTIN_PROVIDER_LOADERS.get(name) or _entry_point_loaders().get(name) + if loader is not None: + return loader() + available = ", ".join(list_providers()) or "" + raise ValueError(f"Unknown sandbox provider {name!r}. Available providers: {available}") def create_provider(config: Mapping[str, Any]) -> SandboxProvider: @@ -65,8 +88,8 @@ def create_provider(config: Mapping[str, Any]) -> SandboxProvider: def list_providers() -> list[str]: - """List registered provider names.""" - return sorted({*_PROVIDER_REGISTRY, *_BUILTIN_PROVIDER_LOADERS}) + """List available provider names from all sources.""" + return sorted({*_PROVIDER_REGISTRY, *_BUILTIN_PROVIDER_LOADERS, *_entry_point_loaders()}) def _load_opensandbox_provider() -> ProviderClass: diff --git a/responses_api_agents/mini_swe_agent_2/README.md b/responses_api_agents/mini_swe_agent_2/README.md index 9a81bf1950..546c67efef 100644 --- a/responses_api_agents/mini_swe_agent_2/README.md +++ b/responses_api_agents/mini_swe_agent_2/README.md @@ -235,6 +235,14 @@ merged into each sandbox's spec metadata (`SandboxSpec.metadata`); the agent's o tags (e.g. `sandbox-api: opensandbox-sdk`) with the provider rather than in the agent config. +To ship a custom provider class from a separate package, register it under the +`nemo_gym.sandbox_providers` entry point group so it is available on install: + +```toml +[project.entry-points."nemo_gym.sandbox_providers"] +my_provider = "my_pkg.provider:MyProvider" +``` + Optional `sandbox_resource_profiles` can be configured as a list of resource maps. When present, the agent hashes `instance_id` and deterministically merges one profile into `sandbox_spec.resources`. This is useful for spreading diff --git a/tests/unit_tests/test_sandbox.py b/tests/unit_tests/test_sandbox.py index d4e6d7bbcd..8193a54e37 100644 --- a/tests/unit_tests/test_sandbox.py +++ b/tests/unit_tests/test_sandbox.py @@ -17,6 +17,7 @@ import threading from datetime import timedelta from pathlib import Path +from types import SimpleNamespace from typing import Any from uuid import uuid4 @@ -385,6 +386,32 @@ def test_provider_registry_validation_and_listing(monkeypatch: pytest.MonkeyPatc assert builtin_name in list_providers() +def test_provider_entry_point_discovery(monkeypatch: pytest.MonkeyPatch) -> None: + ep_name = f"ep-{uuid4().hex}" + + class EntryPointProvider(FakeSandboxProvider): + pass + + def fake_entry_points(*, group: str) -> list[SimpleNamespace]: + assert group == provider_registry.ENTRY_POINT_GROUP + return [SimpleNamespace(name=ep_name, load=lambda: EntryPointProvider)] + + monkeypatch.setattr(provider_registry, "entry_points", fake_entry_points) + monkeypatch.setattr(provider_registry, "_ENTRY_POINT_LOADERS", None) + + assert ep_name in list_providers() + assert get_provider_class(ep_name) is EntryPointProvider + + # Built-in providers take precedence over an entry point with the same name. + monkeypatch.setattr(provider_registry, "_ENTRY_POINT_LOADERS", None) + monkeypatch.setattr( + provider_registry, + "entry_points", + lambda *, group: [SimpleNamespace(name="opensandbox", load=lambda: EntryPointProvider)], + ) + assert get_provider_class("opensandbox").__name__ == "OpenSandboxProvider" + + def test_create_provider_validation_and_constructor_cleanup() -> None: provider_name = f"fake-{uuid4().hex}" register_provider(provider_name, FakeSandboxProvider) From 89aa9654919e5a5b88261bb28ff5b6fbe823431e Mon Sep 17 00:00:00 2001 From: Ananth Subramaniam Date: Wed, 24 Jun 2026 02:51:50 -0700 Subject: [PATCH 4/6] feat(sandbox): detect sandbox provider entry-point name collisions Two installed distributions publishing the same provider entry-point name now raise a clear error naming both packages, instead of silently picking one nondeterministically. An entry point shadowed by a higher-precedence built-in or registered provider is logged as a warning and ignored. Refs #1377 Signed-off-by: Ananth Subramaniam --- nemo_gym/sandbox/providers/registry.py | 39 ++++++++++++++++++++++++-- tests/unit_tests/test_sandbox.py | 33 ++++++++++++++++++++-- 2 files changed, 66 insertions(+), 6 deletions(-) diff --git a/nemo_gym/sandbox/providers/registry.py b/nemo_gym/sandbox/providers/registry.py index a0e6449663..12347e2614 100644 --- a/nemo_gym/sandbox/providers/registry.py +++ b/nemo_gym/sandbox/providers/registry.py @@ -24,15 +24,22 @@ [project.entry-points."nemo_gym.sandbox_providers"] my_provider = "my_pkg.provider:MyProvider" + +On name collisions: two entry points sharing a name raise (selection would be +nondeterministic); an entry point shadowed by a higher-precedence built-in or +registered provider is warned and ignored. """ +import logging from collections.abc import Callable, Mapping -from importlib.metadata import entry_points +from importlib.metadata import EntryPoint, entry_points from typing import Any, TypeAlias from nemo_gym.sandbox.providers.base import SandboxProvider +LOGGER = logging.getLogger(__name__) + ProviderClass: TypeAlias = type[SandboxProvider] ProviderLoader: TypeAlias = Callable[[], ProviderClass] @@ -43,11 +50,37 @@ _ENTRY_POINT_LOADERS: dict[str, ProviderLoader] | None = None +def _entry_point_dist_name(ep: EntryPoint) -> str: + dist = getattr(ep, "dist", None) + return getattr(dist, "name", None) or "" + + def _entry_point_loaders() -> dict[str, ProviderLoader]: - """Discover provider loaders from installed entry points (cached).""" + """Discover provider loaders from installed entry points (cached). + + Raises if two distributions publish the same provider name, since lookup + would otherwise pick one nondeterministically. Warns when an entry point is + shadowed by a built-in or explicitly registered provider of the same name. + """ global _ENTRY_POINT_LOADERS if _ENTRY_POINT_LOADERS is None: - _ENTRY_POINT_LOADERS = {ep.name: ep.load for ep in entry_points(group=ENTRY_POINT_GROUP)} + loaders: dict[str, ProviderLoader] = {} + dist_by_name: dict[str, str] = {} + for ep in entry_points(group=ENTRY_POINT_GROUP): + dist_name = _entry_point_dist_name(ep) + if ep.name in loaders: + raise ValueError( + f"Duplicate sandbox provider entry point {ep.name!r} published by " + f"{dist_by_name[ep.name]!r} and {dist_name!r}. Rename one of them." + ) + if ep.name in _BUILTIN_PROVIDER_LOADERS or ep.name in _PROVIDER_REGISTRY: + LOGGER.warning( + f"Sandbox provider entry point {ep.name!r} from {dist_name!r} is shadowed by a " + f"built-in or registered provider of the same name and will not be used." + ) + loaders[ep.name] = ep.load + dist_by_name[ep.name] = dist_name + _ENTRY_POINT_LOADERS = loaders return _ENTRY_POINT_LOADERS diff --git a/tests/unit_tests/test_sandbox.py b/tests/unit_tests/test_sandbox.py index 8193a54e37..46f0e457d2 100644 --- a/tests/unit_tests/test_sandbox.py +++ b/tests/unit_tests/test_sandbox.py @@ -386,6 +386,10 @@ def test_provider_registry_validation_and_listing(monkeypatch: pytest.MonkeyPatc assert builtin_name in list_providers() +def _fake_entry_point(name: str, provider: type, dist_name: str) -> SimpleNamespace: + return SimpleNamespace(name=name, load=lambda: provider, dist=SimpleNamespace(name=dist_name)) + + def test_provider_entry_point_discovery(monkeypatch: pytest.MonkeyPatch) -> None: ep_name = f"ep-{uuid4().hex}" @@ -394,7 +398,7 @@ class EntryPointProvider(FakeSandboxProvider): def fake_entry_points(*, group: str) -> list[SimpleNamespace]: assert group == provider_registry.ENTRY_POINT_GROUP - return [SimpleNamespace(name=ep_name, load=lambda: EntryPointProvider)] + return [_fake_entry_point(ep_name, EntryPointProvider, "pkg-a")] monkeypatch.setattr(provider_registry, "entry_points", fake_entry_points) monkeypatch.setattr(provider_registry, "_ENTRY_POINT_LOADERS", None) @@ -402,13 +406,36 @@ def fake_entry_points(*, group: str) -> list[SimpleNamespace]: assert ep_name in list_providers() assert get_provider_class(ep_name) is EntryPointProvider - # Built-in providers take precedence over an entry point with the same name. + +def test_provider_entry_point_collisions(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture) -> None: + class EntryPointProvider(FakeSandboxProvider): + pass + + # Two distributions publishing the same name raise, naming both packages. + dup_name = f"ep-{uuid4().hex}" + monkeypatch.setattr(provider_registry, "_ENTRY_POINT_LOADERS", None) + monkeypatch.setattr( + provider_registry, + "entry_points", + lambda *, group: [ + _fake_entry_point(dup_name, EntryPointProvider, "pkg-a"), + _fake_entry_point(dup_name, EntryPointProvider, "pkg-b"), + ], + ) + with pytest.raises(ValueError, match=r"Duplicate sandbox provider entry point.*pkg-a.*pkg-b"): + list_providers() + + # An entry point shadowed by a built-in is warned (at discovery) and ignored. monkeypatch.setattr(provider_registry, "_ENTRY_POINT_LOADERS", None) monkeypatch.setattr( provider_registry, "entry_points", - lambda *, group: [SimpleNamespace(name="opensandbox", load=lambda: EntryPointProvider)], + lambda *, group: [_fake_entry_point("opensandbox", EntryPointProvider, "pkg-a")], ) + with caplog.at_level("WARNING", logger=provider_registry.__name__): + list_providers() + assert any("shadowed" in message for message in caplog.messages) + # Built-in still wins on lookup. assert get_provider_class("opensandbox").__name__ == "OpenSandboxProvider" From 0f2fca211f3fec6b962eb313d6151c4a48225db9 Mon Sep 17 00:00:00 2001 From: Ananth Subramaniam Date: Wed, 24 Jun 2026 02:49:39 -0700 Subject: [PATCH 5/6] feat(sandbox): validate OpenSandbox provider_options via a frozen dataclass Represent the recognized per-sandbox create options (spec.provider_options) as a frozen OpenSandboxProviderOptions dataclass with a validating from_mapping, so the supported options and their types are discoverable in one place and unknown keys are rejected with a clear error. The create path now reads typed attributes instead of scattered dict lookups. SDK-owned nested structures (platform, volumes) stay pass-through mappings so their inner fields remain validated by the OpenSandbox SDK rather than over-constrained here. Refs #1377 Signed-off-by: Ananth Subramaniam --- .../sandbox/providers/opensandbox/provider.py | 131 ++++++++++-------- tests/unit_tests/test_opensandbox_provider.py | 44 ++++-- 2 files changed, 113 insertions(+), 62 deletions(-) diff --git a/nemo_gym/sandbox/providers/opensandbox/provider.py b/nemo_gym/sandbox/providers/opensandbox/provider.py index 8bac478ed8..a50f85fb9a 100644 --- a/nemo_gym/sandbox/providers/opensandbox/provider.py +++ b/nemo_gym/sandbox/providers/opensandbox/provider.py @@ -19,7 +19,7 @@ import re import shlex from collections.abc import Mapping -from dataclasses import dataclass, replace +from dataclasses import dataclass, field, replace from datetime import timedelta from pathlib import Path from typing import Any, Awaitable, Callable @@ -92,10 +92,6 @@ class OpenSandboxCreateVerificationError(SandboxCreateVerificationError): DEFAULT_IMAGE_PULL_POLICY = "IfNotPresent" IMAGE_PULL_POLICY_EXTENSION_KEY = "imagePullPolicy" IMAGE_PULL_POLICY_ANNOTATION_EXTENSION_KEY = "opensandbox.extensions.image-pull-policy" -PROVIDER_OPTION_PLATFORM = "platform" -PROVIDER_OPTION_SKIP_HEALTH_CHECK = "skip_health_check" -PROVIDER_OPTION_SNAPSHOT_ID = "snapshot_id" -PROVIDER_OPTION_VOLUMES = "volumes" VALID_IMAGE_PULL_POLICIES = {"Always", "IfNotPresent", "Never"} STATUS_CODE_RE = re.compile(r"(?:status code|http)\D+(\d{3})", re.IGNORECASE) @@ -323,26 +319,6 @@ def _to_volumes(volumes: list[Mapping[str, Any]]) -> list[Any]: return [Volume(**dict(volume)) for volume in volumes] -def _spec_volumes(spec: SandboxSpec) -> list[Mapping[str, Any]] | None: - return spec.provider_options.get(PROVIDER_OPTION_VOLUMES) - - -def _spec_extensions(spec: SandboxSpec) -> dict[str, str]: - value = spec.provider_options.get("extensions", {}) - if not isinstance(value, Mapping): - raise TypeError("OpenSandbox provider option 'extensions' must be a mapping") - return _string_map(dict(value)) - - -def _provider_option_bool(provider_options: dict[str, Any], key: str) -> bool | None: - value = provider_options.get(key) - if value is None: - return None - if not isinstance(value, bool): - raise TypeError(f"OpenSandbox provider option {key!r} must be a bool") - return value - - def _to_sandbox_status(state: Any) -> SandboxStatus: normalized = str(state or "").lower() if normalized in {"active", "ready", "running"}: @@ -453,6 +429,60 @@ def _coerce_config(value: Any, config_cls: type[Any]) -> Any: raise TypeError(f"{config_cls.__name__} must be a mapping or {config_cls.__name__} instance") +@dataclass(frozen=True) +class OpenSandboxProviderOptions: + """Recognized per-sandbox create options read from ``SandboxSpec.provider_options``. + + ``platform`` and ``volumes`` entries are passed through to the OpenSandbox SDK, + so their inner fields are validated by the SDK rather than here. + """ + + platform: Mapping[str, Any] | None = None + snapshot_id: str | None = None + volumes: tuple[Mapping[str, Any], ...] = () + skip_health_check: bool | None = None + extensions: Mapping[str, str] = field(default_factory=dict) + + @classmethod + def from_mapping(cls, options: Mapping[str, Any] | None) -> "OpenSandboxProviderOptions": + if options is None: + return cls() + if not isinstance(options, Mapping): + raise TypeError("OpenSandbox provider_options must be a mapping") + + allowed = set(cls.__dataclass_fields__) + unknown = set(options) - allowed + if unknown: + raise ValueError( + f"Unknown OpenSandbox provider option(s): {', '.join(sorted(unknown))}. " + f"Supported: {', '.join(sorted(allowed))}" + ) + + platform = options.get("platform") + if platform is not None and not isinstance(platform, Mapping): + raise TypeError("OpenSandbox provider option 'platform' must be a mapping") + snapshot_id = options.get("snapshot_id") + if snapshot_id is not None and not isinstance(snapshot_id, str): + raise TypeError("OpenSandbox provider option 'snapshot_id' must be a string") + volumes = options.get("volumes") or () + if not isinstance(volumes, (list, tuple)) or not all(isinstance(volume, Mapping) for volume in volumes): + raise TypeError("OpenSandbox provider option 'volumes' must be a list of mappings") + skip_health_check = options.get("skip_health_check") + if skip_health_check is not None and not isinstance(skip_health_check, bool): + raise TypeError("OpenSandbox provider option 'skip_health_check' must be a bool") + extensions = options.get("extensions", {}) + if not isinstance(extensions, Mapping): + raise TypeError("OpenSandbox provider option 'extensions' must be a mapping") + + return cls( + platform=dict(platform) if platform is not None else None, + snapshot_id=snapshot_id, + volumes=tuple(dict(volume) for volume in volumes), + skip_health_check=skip_health_check, + extensions=_string_map(dict(extensions)), + ) + + class OpenSandboxProvider: """Provider backed by the OpenSandbox SDK/server API.""" @@ -471,23 +501,21 @@ def __init__( self._probe = _coerce_config(probe, OpenSandboxProbeConfig) self._operations = _coerce_config(operations, OpenSandboxOperationConfig) - def _with_default_image_pull_policy(self, spec: SandboxSpec) -> SandboxSpec: - """Ensure SDK create requests carry the desired image pull policy.""" + def _resolve_extensions(self, extensions: Mapping[str, str]) -> dict[str, str]: + """Add the configured default image pull policy to SDK create extensions.""" + resolved = dict(extensions) if self._create.image_pull_policy is None: - return spec + return resolved - provider_options = dict(spec.provider_options) - extensions = _spec_extensions(spec) - image_pull_policy = extensions.get(IMAGE_PULL_POLICY_EXTENSION_KEY) or extensions.get( - IMAGE_PULL_POLICY_ANNOTATION_EXTENSION_KEY + image_pull_policy = ( + resolved.get(IMAGE_PULL_POLICY_EXTENSION_KEY) + or resolved.get(IMAGE_PULL_POLICY_ANNOTATION_EXTENSION_KEY) + or self._create.image_pull_policy ) - if image_pull_policy is None: - image_pull_policy = self._create.image_pull_policy image_pull_policy = validate_image_pull_policy(image_pull_policy) - extensions.setdefault(IMAGE_PULL_POLICY_EXTENSION_KEY, image_pull_policy) - extensions.setdefault(IMAGE_PULL_POLICY_ANNOTATION_EXTENSION_KEY, image_pull_policy) - provider_options["extensions"] = extensions - return replace(spec, provider_options=provider_options) + resolved.setdefault(IMAGE_PULL_POLICY_EXTENSION_KEY, image_pull_policy) + resolved.setdefault(IMAGE_PULL_POLICY_ANNOTATION_EXTENSION_KEY, image_pull_policy) + return resolved def _connection_config( self, @@ -694,37 +722,33 @@ async def _connect_after_create(self, handle: SandboxHandle, spec: SandboxSpec) async def _create_once(self, spec: SandboxSpec) -> SandboxHandle: """Create a sandbox through ``opensandbox.Sandbox.create``.""" Sandbox, _, _, _, _ = _require_opensandbox_sdk() + options = OpenSandboxProviderOptions.from_mapping(spec.provider_options) kwargs: dict[str, Any] = { "env": spec.env, "metadata": spec.metadata, "resource": _resource_map(spec.resources), - "extensions": _spec_extensions(spec), + "extensions": self._resolve_extensions(options.extensions), "connection_config": self._connection_config(request_timeout_s=self._create.request_timeout_s), } if spec.image is not None: kwargs["image"] = spec.image - snapshot_id = spec.provider_options.get(PROVIDER_OPTION_SNAPSHOT_ID) - if snapshot_id is not None: - kwargs["snapshot_id"] = snapshot_id + if options.snapshot_id is not None: + kwargs["snapshot_id"] = options.snapshot_id if spec.ttl_s is not None: kwargs["timeout"] = timedelta(seconds=spec.ttl_s) if spec.ready_timeout_s is not None: kwargs["ready_timeout"] = timedelta(seconds=spec.ready_timeout_s) if spec.entrypoint is not None: kwargs["entrypoint"] = spec.entrypoint - platform = spec.provider_options.get(PROVIDER_OPTION_PLATFORM) - volumes = _spec_volumes(spec) - if platform is not None: - kwargs["platform"] = _to_platform_spec(platform) - if volumes is not None: - kwargs["volumes"] = _to_volumes(volumes) + if options.platform is not None: + kwargs["platform"] = _to_platform_spec(options.platform) + if options.volumes: + kwargs["volumes"] = _to_volumes(list(options.volumes)) if self._create.skip_health_check: kwargs["skip_health_check"] = True - else: - skip_health_check = _provider_option_bool(spec.provider_options, PROVIDER_OPTION_SKIP_HEALTH_CHECK) - if skip_health_check is not None: - kwargs["skip_health_check"] = skip_health_check + elif options.skip_health_check is not None: + kwargs["skip_health_check"] = options.skip_health_check timeout_s = self._create.timeout_s if timeout_s is None and self._connection.request_timeout_s is not None: @@ -790,8 +814,7 @@ async def _create_with_retries( async def create(self, spec: SandboxSpec) -> SandboxHandle: """Create one sandbox through the configured OpenSandbox path.""" - spec = self._with_default_image_pull_policy(_normalize_spec(spec)) - return await self._create_with_retries(spec) + return await self._create_with_retries(_normalize_spec(spec)) async def status(self, handle: SandboxHandle) -> SandboxStatus: """Return the current OpenSandbox lifecycle status.""" diff --git a/tests/unit_tests/test_opensandbox_provider.py b/tests/unit_tests/test_opensandbox_provider.py index c57be62a6d..57fba7327c 100644 --- a/tests/unit_tests/test_opensandbox_provider.py +++ b/tests/unit_tests/test_opensandbox_provider.py @@ -187,11 +187,9 @@ def test_provider_validation_and_retry_helpers() -> None: with pytest.raises(ValueError, match="image_pull_policy"): opensandbox_provider.validate_image_pull_policy("Sometimes") with pytest.raises(TypeError, match="extensions"): - opensandbox_provider._spec_extensions( - SandboxSpec(image="image:tag", provider_options={"extensions": ["not", "a", "mapping"]}) - ) + opensandbox_provider.OpenSandboxProviderOptions.from_mapping({"extensions": ["not", "a", "mapping"]}) with pytest.raises(TypeError, match="must be a bool"): - opensandbox_provider._provider_option_bool({"skip_health_check": "true"}, "skip_health_check") + opensandbox_provider.OpenSandboxProviderOptions.from_mapping({"skip_health_check": "true"}) assert opensandbox_provider._resource_map(SandboxResources(cpu=2.0))["cpu"] == "2" assert opensandbox_provider._to_sandbox_status("starting") == SandboxStatus.STARTING @@ -240,6 +238,38 @@ def test_provider_validation_and_retry_helpers() -> None: assert attrs["next_sleep_s"] == 0.5 +def test_provider_options_from_mapping() -> None: + options_cls = opensandbox_provider.OpenSandboxProviderOptions + + assert options_cls.from_mapping(None) == options_cls() + + parsed = options_cls.from_mapping( + { + "platform": {"os": "linux", "arch": "amd64"}, + "snapshot_id": "snap-1", + "volumes": [{"name": "workspace"}], + "skip_health_check": True, + "extensions": {"imagePullPolicy": "Never"}, + } + ) + assert parsed.platform == {"os": "linux", "arch": "amd64"} + assert parsed.snapshot_id == "snap-1" + assert parsed.volumes == ({"name": "workspace"},) + assert parsed.skip_health_check is True + assert parsed.extensions == {"imagePullPolicy": "Never"} + + with pytest.raises(ValueError, match="Unknown OpenSandbox provider option"): + options_cls.from_mapping({"bogus": 1}) + with pytest.raises(TypeError, match="provider_options must be a mapping"): + options_cls.from_mapping(["not", "a", "mapping"]) + with pytest.raises(TypeError, match="'platform' must be a mapping"): + options_cls.from_mapping({"platform": "linux/amd64"}) + with pytest.raises(TypeError, match="'snapshot_id' must be a string"): + options_cls.from_mapping({"snapshot_id": 123}) + with pytest.raises(TypeError, match="'volumes' must be a list of mappings"): + options_cls.from_mapping({"volumes": ["workspace"]}) + + def test_connection_config_and_image_policy(fake_opensandbox_sdk: None) -> None: provider = opensandbox_provider.OpenSandboxProvider( connection={ @@ -262,14 +292,12 @@ def test_connection_config_and_image_policy(fake_opensandbox_sdk: None) -> None: short_timeout_config = provider._connection_config(request_timeout_s=3) assert short_timeout_config.kwargs["request_timeout"] == timedelta(seconds=3) - spec = SandboxSpec(image="image:tag", provider_options={"extensions": {"imagePullPolicy": "Never"}}) - updated = provider._with_default_image_pull_policy(spec) - extensions = updated.provider_options["extensions"] + extensions = provider._resolve_extensions({"imagePullPolicy": "Never"}) assert extensions["imagePullPolicy"] == "Never" assert extensions["opensandbox.extensions.image-pull-policy"] == "Never" no_policy_provider = opensandbox_provider.OpenSandboxProvider(create={"image_pull_policy": None}) - assert no_policy_provider._with_default_image_pull_policy(spec) is spec + assert no_policy_provider._resolve_extensions({"imagePullPolicy": "Never"}) == {"imagePullPolicy": "Never"} async def test_exec_file_operations_and_reference_validation(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: From ced8a3a037f223dd0a6af78123360c355aa01c97 Mon Sep 17 00:00:00 2001 From: Hemil Desai Date: Fri, 26 Jun 2026 22:54:47 -0700 Subject: [PATCH 6/6] feat(sandbox): integrate Apptainer provider stack Add the named Apptainer config and align provider options with the sandbox provider stack. Signed-off-by: Hemil Desai --- nemo_gym/sandbox/__init__.py | 7 +- nemo_gym/sandbox/config.py | 49 +++++++-- .../sandbox/providers/apptainer/README.md | 100 +++++++++++++----- .../apptainer/configs/apptainer.yaml | 43 ++++++++ .../sandbox/providers/apptainer/provider.py | 58 ++++++---- .../opensandbox/configs/opensandbox.yaml | 8 +- pyproject.toml | 2 +- .../mini_swe_agent_2/README.md | 26 +++-- responses_api_agents/mini_swe_agent_2/app.py | 11 +- .../configs/mini_swe_agent_2.yaml | 9 +- .../mini_swe_agent_2/tests/test_app.py | 69 ++++++++++++ tests/unit_tests/test_apptainer_provider.py | 76 ++++++++++++- tests/unit_tests/test_sandbox.py | 19 +++- 13 files changed, 404 insertions(+), 73 deletions(-) create mode 100644 nemo_gym/sandbox/providers/apptainer/configs/apptainer.yaml diff --git a/nemo_gym/sandbox/__init__.py b/nemo_gym/sandbox/__init__.py index e85f5728db..9ecf27b958 100644 --- a/nemo_gym/sandbox/__init__.py +++ b/nemo_gym/sandbox/__init__.py @@ -15,7 +15,11 @@ """Public sandbox API for NeMo Gym.""" from nemo_gym.sandbox.api import AsyncSandbox, Sandbox -from nemo_gym.sandbox.config import resolve_provider_config, resolve_provider_metadata +from nemo_gym.sandbox.config import ( + resolve_provider_config, + resolve_provider_default_options, + resolve_provider_metadata, +) from nemo_gym.sandbox.providers import ( ExecResult, SandboxCreateError, @@ -51,6 +55,7 @@ "list_providers", "register_provider", "resolve_provider_config", + "resolve_provider_default_options", "resolve_provider_metadata", "rewrite_image", ] diff --git a/nemo_gym/sandbox/config.py b/nemo_gym/sandbox/config.py index c0da2d73bd..74fca510ac 100644 --- a/nemo_gym/sandbox/config.py +++ b/nemo_gym/sandbox/config.py @@ -29,14 +29,16 @@ An inline single-key mapping (``{provider_name: {...}}``) is also accepted for keeping everything in one file. -A block may also carry a reserved ``default_metadata`` key. Its entries are merged -into the sandbox's ``SandboxSpec.metadata`` as defaults (the agent's own -``sandbox_spec.metadata`` overrides them), so provider-identifying tags live with -the provider rather than the agent config:: +A block may also carry reserved ``default_metadata`` and +``default_provider_options`` keys. Their top-level entries are shallow-merged +into the sandbox's ``SandboxSpec.metadata`` and ``SandboxSpec.provider_options`` +respectively (the agent's own ``sandbox_spec`` entries override them), so +provider-owned defaults live with the provider rather than the agent config:: sandbox: opensandbox: { connection: { ... } } default_metadata: { sandbox-api: opensandbox-sdk } + default_provider_options: { platform: { os: linux, arch: amd64 } } """ from collections.abc import Mapping @@ -45,7 +47,13 @@ # Reserved keys inside a sandbox block that are not the provider config. SANDBOX_BLOCK_DEFAULT_METADATA_KEY = "default_metadata" -SANDBOX_BLOCK_RESERVED_KEYS = frozenset({SANDBOX_BLOCK_DEFAULT_METADATA_KEY}) +SANDBOX_BLOCK_DEFAULT_PROVIDER_OPTIONS_KEY = "default_provider_options" +SANDBOX_BLOCK_RESERVED_KEYS = frozenset( + { + SANDBOX_BLOCK_DEFAULT_METADATA_KEY, + SANDBOX_BLOCK_DEFAULT_PROVIDER_OPTIONS_KEY, + } +) def _to_plain_dict(value: Any) -> Any: @@ -124,8 +132,9 @@ def resolve_provider_config( Returns: A plain ``{provider_name: provider_kwargs}`` dict suitable for :func:`nemo_gym.sandbox.create_provider`. Reserved keys such as - ``default_metadata`` are excluded; read them with - :func:`resolve_provider_metadata`. + ``default_metadata`` and ``default_provider_options`` are excluded; read + them with :func:`resolve_provider_metadata` and + :func:`resolve_provider_default_options`. Raises: TypeError: If ``sandbox_provider`` is neither a string nor a mapping. @@ -166,3 +175,29 @@ def resolve_provider_metadata( f"Sandbox '{SANDBOX_BLOCK_DEFAULT_METADATA_KEY}' from {source} must be a mapping, got: {metadata!r}" ) return dict(metadata) + + +def resolve_provider_default_options( + sandbox_provider: str | Mapping[str, Any], + named_configs: Mapping[str, Any] | None = None, +) -> dict[str, Any]: + """Return a sandbox block's ``default_provider_options``. + + These provider-owned defaults are shallow-merged by top-level key into + ``SandboxSpec.provider_options``; explicit options from the agent's + ``sandbox_spec`` take precedence. Returns an empty dict when the block has no + ``default_provider_options`` key. See :func:`resolve_provider_config` for + argument semantics. + """ + block, source = _resolve_block(sandbox_provider, named_configs) + if not isinstance(block, Mapping): + raise ValueError(f"Sandbox provider config from {source} must be a mapping, got: {block!r}") + + options = block.get(SANDBOX_BLOCK_DEFAULT_PROVIDER_OPTIONS_KEY) + if options is None: + return {} + if not isinstance(options, Mapping): + raise ValueError( + f"Sandbox '{SANDBOX_BLOCK_DEFAULT_PROVIDER_OPTIONS_KEY}' from {source} must be a mapping, got: {options!r}" + ) + return dict(options) diff --git a/nemo_gym/sandbox/providers/apptainer/README.md b/nemo_gym/sandbox/providers/apptainer/README.md index ca7b36f2a2..66f749edcb 100644 --- a/nemo_gym/sandbox/providers/apptainer/README.md +++ b/nemo_gym/sandbox/providers/apptainer/README.md @@ -72,30 +72,62 @@ async def run(): ## Selecting and configuring the provider -The provider config is a single-key mapping: `{"apptainer": {}}`. The kwargs are -grouped into three optional sections, each of which accepts a plain mapping (e.g. from -Hydra YAML) or the corresponding dataclass: +For Hydra-composed agents, use the bundled named provider config: + +```bash +AGENT=responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_2.yaml +MODEL=responses_api_models/vllm_model/configs/vllm_model.yaml +PROVIDER=nemo_gym/sandbox/providers/apptainer/configs/apptainer.yaml +ng_run "+config_paths=[$AGENT,$PROVIDER,$MODEL]" +``` + +The config binds the conventional name `sandbox`, which the agent selects with +`sandbox_provider: sandbox`. It can therefore replace the OpenSandbox config by changing +only the provider path. `default_metadata` is merged into each `SandboxSpec.metadata`, +with the agent's explicit metadata taking precedence: ```yaml -# Provider config (the value passed as the sandbox provider) -apptainer: - exec: - fakeroot_for_root: true - default_binds: ["/tmp"] - extra_exec_args: ["--writable-tmpfs"] - default_timeout_s: 180 - concurrency: 32 - create: - mount_point: /sandbox - start_timeout_s: 600 - extra_start_args: [] - apply_resource_limits: true - probe: - command: printf apptainer-sandbox-ready - expected_stdout: apptainer-sandbox-ready - deadline_s: 120 +sandbox: + default_metadata: + sandbox-api: apptainer-cli + apptainer: + exec: + fakeroot_for_root: false # instance start establishes fakeroot below + default_binds: [] + extra_exec_args: ["--cleanenv"] + default_timeout_s: 180 + concurrency: 32 + create: + mount_point: /sandbox + start_timeout_s: 1200 + extra_start_args: ["--containall", "--cleanenv", "--fakeroot", "--writable-tmpfs"] + apply_resource_limits: false + probe: + command: printf apptainer-sandbox-ready + expected_stdout: apptainer-sandbox-ready + timeout_s: 30 + deadline_s: 120 + stable_count: 1 + stable_delay_s: 0.0 ``` +The value under `apptainer` is the provider constructor config. Its three optional +sections accept either a plain mapping (for example, from Hydra YAML) or the corresponding +dataclass. Direct Python callers pass the resolved single-key mapping +`{"apptainer": {}}` to `Sandbox` or `AsyncSandbox`, as shown above. + +The bundled config uses `--containall` and `--cleanenv` to suppress Apptainer's standard +host home/tmp binds and ordinary host environment inheritance. It also starts with +fakeroot and an ephemeral writable overlay so Mini SWE commands can run as root and +persistent edits can modify an otherwise read-only SIF image. These flags must be applied +by `instance start`; adding them only to `exec instance://...` cannot change an existing +instance's mount/user namespace. The profile therefore disables CPU/memory cgroup flags, +which Apptainer cannot combine with fakeroot in this provider. A host must support +fakeroot and writable-tmpfs overlays; adjust `create.extra_start_args` for writable +sandbox images or hosts without those features. Apptainer's `sessiondir max size` +controls writable-tmpfs capacity; `SandboxSpec.disk_gib` does not resize it, so raise the +host limit for Mini SWE workloads. + ### `create` — `ApptainerCreateConfig` Settings for starting the instance (`apptainer instance start`). @@ -114,9 +146,9 @@ Settings for running commands (`apptainer exec`) and global provider behavior. | Field | Default | Meaning | |---|---|---| | `default_timeout_s` | `180` | Default per-command timeout when the caller doesn't pass one (`None` = no timeout). | -| `fakeroot_for_root` | `true` | When running as root, add `--fakeroot` (map the host user to root inside the container). | +| `fakeroot_for_root` | `true` | When `user` is root, request `--fakeroot` on exec. The instance must also have been started with `--fakeroot`; the bundled config does this. | | `default_binds` | `[]` | Extra `--bind host:container` mounts added at instance start. | -| `extra_exec_args` | `[]` | Extra raw flags appended to every `apptainer exec` (e.g. `--no-home`, `--writable-tmpfs`, `--contain`). | +| `extra_exec_args` | `[]` | Extra raw flags appended to every `apptainer exec` (e.g. `--cleanenv` or `--no-eval`). Mount/user namespace flags belong in `create.extra_start_args`. | | `concurrency` | `32` | Upper bound on concurrent `apptainer` subprocesses (shared semaphore). | ### `probe` — `ApptainerProbeConfig` @@ -145,9 +177,19 @@ The spec is provider-neutral; the Apptainer provider uses these fields: | `workdir` | Default working directory for `exec` (applied as `--pwd`). | | `files` | Seed files written into the sandbox at `start()` (handled by the sandbox API via `upload`). | | `resources` | Mapped to cgroup flags (see below). | -| `provider_options` | `binds`: a `"src:dst[:opts]"` string or list of them — extra per-sandbox `--bind` mounts added at instance start (on top of the staging mount and `exec.default_binds`). | +| `provider_options` | Validated by `ApptainerProviderOptions`; currently supports only `binds` (see below). | | `ttl_s` | **Not supported** — ignored with a warning. Tear down via `stop()`/`close()` instead. | +### Per-sandbox provider options — `ApptainerProviderOptions` + +`SandboxSpec.provider_options` is parsed into a frozen dataclass before any instance +resources are allocated. The only supported option is `binds`: either one +`"src:dst[:opts]"` string or a list/tuple of strings. These per-sandbox mounts are added +at instance start after the staging mount and `exec.default_binds`. + +Unknown keys, a non-mapping `provider_options` value, and non-string bind entries raise a +clear validation error instead of being silently ignored. + ## How it works ### Lifecycle: one persistent instance per sandbox @@ -256,8 +298,13 @@ failed" rather than "the command exited 125". per relevant create). Manage lifetime with `stop()` / `close()`. - **Numeric uids.** The `su`-based user switch expects a *username*; a bare numeric uid may not resolve. Prefer named users. -- **`--fakeroot` on exec.** Whether `--fakeroot` works on `exec` into an instance that - was started *without* fakeroot varies by Apptainer version and host configuration. +- **Fakeroot is fixed at instance start.** `exec instance://...` cannot retrofit a user + namespace onto a non-fakeroot instance; put `--fakeroot` in + `create.extra_start_args` when commands must run as root. +- **Apptainer launcher variables remain significant.** `APPTAINER_BIND`, + `APPTAINER_BINDPATH`, `APPTAINER_MOUNT`, `APPTAINERENV_*`, and legacy Singularity + aliases can inject mounts or environment values before container-side `--cleanenv` + applies. Sanitize these in the worker/launcher environment when running untrusted code. - **Resource enforcement.** CPU/memory cgroup flags require cgroups v2 delegation. Disable them with `create.apply_resource_limits: false`. - **Runtime-failure detection is heuristic.** It keys off stderr markers, so a user @@ -265,7 +312,8 @@ failed" rather than "the command exited 125". ## Development -Source: [`provider.py`](./provider.py). The provider implements the +Source: [`provider.py`](./provider.py); the bundled named config is +[`configs/apptainer.yaml`](./configs/apptainer.yaml). The provider implements the `SandboxProvider` protocol from [`../base.py`](../base.py) structurally (no subclassing) and is registered under the name `apptainer` in [`../registry.py`](../registry.py). diff --git a/nemo_gym/sandbox/providers/apptainer/configs/apptainer.yaml b/nemo_gym/sandbox/providers/apptainer/configs/apptainer.yaml new file mode 100644 index 0000000000..2f1ba8a0a6 --- /dev/null +++ b/nemo_gym/sandbox/providers/apptainer/configs/apptainer.yaml @@ -0,0 +1,43 @@ +# Apptainer sandbox provider config. +# +# `sandbox` is the instance name an agent references via `sandbox_provider: +# sandbox`; the child key `apptainer` selects the provider class and its value +# is passed to the provider constructor. +# +# Every shipped provider config binds the same name `sandbox`, so swapping +# providers is swapping this config path in `+config_paths` (no agent edit): +# +# AGENT=responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_2.yaml +# MODEL=responses_api_models/vllm_model/configs/vllm_model.yaml +# ng_run "+config_paths=[$AGENT, nemo_gym/sandbox/providers/apptainer/configs/apptainer.yaml, $MODEL]" +# +# To run multiple sandboxes at once, give each block a distinct instance name +# (e.g. `apptainer_foo`, `apptainer_baz`) and reference each by name. +# +# `default_metadata` (optional) is merged into every sandbox's spec metadata +# (SandboxSpec.metadata); an agent's own sandbox_spec.metadata overrides it. +sandbox: + default_metadata: + sandbox-api: apptainer-cli + apptainer: + exec: + default_timeout_s: 180 + # The instance itself starts with --fakeroot below; repeating it while + # joining instance:// is ignored by Apptainer. + fakeroot_for_root: false + default_binds: [] + extra_exec_args: ["--cleanenv"] + concurrency: 32 + create: + mount_point: /sandbox + start_timeout_s: 1200 + extra_start_args: ["--containall", "--cleanenv", "--fakeroot", "--writable-tmpfs"] + # This provider skips CPU/memory cgroup flags when starting with --fakeroot. + apply_resource_limits: false + probe: + command: printf apptainer-sandbox-ready + expected_stdout: apptainer-sandbox-ready + timeout_s: 30 + deadline_s: 120 + stable_count: 1 + stable_delay_s: 0.0 diff --git a/nemo_gym/sandbox/providers/apptainer/provider.py b/nemo_gym/sandbox/providers/apptainer/provider.py index 0605b1a805..7d508b2145 100644 --- a/nemo_gym/sandbox/providers/apptainer/provider.py +++ b/nemo_gym/sandbox/providers/apptainer/provider.py @@ -68,7 +68,7 @@ def _require_apptainer() -> str: if path is None: raise RuntimeError( "The 'apptainer' binary is required for the apptainer sandbox provider. " - "Install Apptainer before using env.sandbox.provider.name=apptainer." + "Install Apptainer before selecting the 'apptainer' sandbox provider." ) return path @@ -139,6 +139,40 @@ def __post_init__(self) -> None: raise ValueError("probe.stable_delay_s must be >= 0") +@dataclass(frozen=True) +class ApptainerProviderOptions: + """Recognized per-sandbox create options read from ``SandboxSpec.provider_options``.""" + + binds: tuple[str, ...] = () + + @classmethod + def from_mapping(cls, options: Mapping[str, Any] | None) -> "ApptainerProviderOptions": + if options is None: + return cls() + if not isinstance(options, Mapping): + raise TypeError("Apptainer provider_options must be a mapping") + + allowed = set(cls.__dataclass_fields__) + unknown = set(options) - allowed + if unknown: + raise ValueError( + f"Unknown Apptainer provider option(s): {', '.join(sorted(unknown))}. " + f"Supported: {', '.join(sorted(allowed))}" + ) + + binds = options.get("binds") + if binds is None: + normalized_binds: tuple[str, ...] = () + elif isinstance(binds, str): + normalized_binds = (binds,) + elif isinstance(binds, (list, tuple)) and all(isinstance(bind, str) for bind in binds): + normalized_binds = tuple(binds) + else: + raise TypeError("Apptainer provider option 'binds' must be a string or list/tuple of strings") + + return cls(binds=normalized_binds) + + @dataclass class _ApptainerInstance: """Provider-private state stashed on SandboxHandle.raw.""" @@ -220,22 +254,6 @@ def _is_missing_instance(stderr: str) -> bool: return any(marker in low for marker in APPTAINER_MISSING_INSTANCE_MARKERS) -def _coerce_binds(value: Any) -> list[str]: - """Normalize ``spec.provider_options['binds']`` into a list of bind strings. - - Accepts a single ``"src:dst[:opts]"`` string or a list of them. These are - extra per-sandbox bind mounts, added on top of the staging mount and the - provider-level ``exec.default_binds``. - """ - if value is None: - return [] - if isinstance(value, str): - return [value] - if isinstance(value, (list, tuple)): - return [str(v) for v in value] - raise ApptainerCreateError(f"provider_options['binds'] must be a string or list, got {type(value).__name__}") - - class ApptainerProvider: """Sandbox provider backed by the local Apptainer CLI.""" @@ -346,7 +364,7 @@ async def create(self, spec: SandboxSpec) -> SandboxHandle: mount_point = self._create_config.mount_point, generate a unique name = INSTANCE_NAME_PREFIX + uuid4().hex. 4. Build argv: [binary, "instance", "start", <--bind staging:mount_point>, - , , <--env ...>, + , , <--env ...>, _resource_flags(spec.resources), , image, name]. 5. await self._run(argv, timeout_s=self._create_config.start_timeout_s); on non-zero return, clean up the staging dir and raise @@ -367,7 +385,7 @@ async def create(self, spec: SandboxSpec) -> SandboxHandle: image = _resolve_image(spec.image) # Extra per-sandbox bind mounts (validated before we allocate anything). - extra_binds = _coerce_binds(spec.provider_options.get("binds")) + options = ApptainerProviderOptions.from_mapping(spec.provider_options) # host staging dir (bind-mounted in), mount point, unique name. mount_point = self._create_config.mount_point @@ -381,7 +399,7 @@ async def create(self, spec: SandboxSpec) -> SandboxHandle: argv += ["--bind", f"{staging_dir}:{mount_point}"] for bind in self._exec_config.default_binds: argv += ["--bind", bind] - for bind in extra_binds: + for bind in options.binds: argv += ["--bind", bind] for key, value in spec.env.items(): argv += ["--env", f"{key}={value}"] diff --git a/nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml b/nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml index 708ceac31c..12a2f678a4 100644 --- a/nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml +++ b/nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml @@ -14,11 +14,15 @@ # To run multiple sandboxes at once, give each block a distinct instance name # (e.g. `opensandbox_foo`, `opensandbox_baz`) and reference each by name. # -# `default_metadata` (optional) is merged into every sandbox's spec metadata -# (SandboxSpec.metadata); an agent's own sandbox_spec.metadata overrides it. +# `default_metadata` and `default_provider_options` (optional) are shallow- +# merged by top-level key into every SandboxSpec; agent values override them. sandbox: default_metadata: sandbox-api: opensandbox-sdk + default_provider_options: + platform: + os: linux + arch: amd64 opensandbox: connection: domain: ${oc.env:OPENSANDBOX_DOMAIN,opensandbox-server.opensandbox-system.svc.cluster.local} diff --git a/pyproject.toml b/pyproject.toml index 83caf14ff1..b532148566 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -329,7 +329,7 @@ exclude-dependencies = [ ] [tool.setuptools.package-data] -nemo_gym = ["resources/*.py"] +nemo_gym = ["resources/*.py", "sandbox/providers/*/configs/*.yaml"] [tool.setuptools.dynamic] version = {attr = "nemo_gym.__version__"} diff --git a/responses_api_agents/mini_swe_agent_2/README.md b/responses_api_agents/mini_swe_agent_2/README.md index 546c67efef..5687f1947e 100644 --- a/responses_api_agents/mini_swe_agent_2/README.md +++ b/responses_api_agents/mini_swe_agent_2/README.md @@ -162,10 +162,6 @@ mini_swe_agent_2: cpu: 2 memory_mib: 8192 disk_gib: 20 - provider_options: - platform: - os: linux - arch: amd64 metadata: benchmark: swebench-verified harness: mini-swe-agent @@ -197,6 +193,10 @@ Path - `nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml` sandbox: # name referenced by the agent's sandbox_provider default_metadata: # optional: merged into sandbox spec metadata (see below) sandbox-api: opensandbox-sdk + default_provider_options: # optional: merged into sandbox provider_options + platform: + os: linux + arch: amd64 opensandbox: # provider registry key -> provider class connection: domain: ${oc.env:OPENSANDBOX_DOMAIN,opensandbox-server.opensandbox-system.svc.cluster.local} @@ -229,12 +229,24 @@ To use a different provider, add a config file under `sandbox` block (the name the agent references) with that provider's registry key, then point `+config_paths` at it instead — no agent edit required. +Apptainer ships such a config at +`nemo_gym/sandbox/providers/apptainer/configs/apptainer.yaml`. Select it by replacing +the OpenSandbox provider path in `+config_paths`; the host must have the `apptainer` +binary installed. See the [Apptainer provider README](../../nemo_gym/sandbox/providers/apptainer/README.md) +for its runtime requirements and configuration knobs. + An optional `default_metadata` key holds provider-contributed defaults that are merged into each sandbox's spec metadata (`SandboxSpec.metadata`); the agent's own `sandbox_spec.metadata` overrides them on conflict. This keeps provider-identifying tags (e.g. `sandbox-api: opensandbox-sdk`) with the provider rather than in the agent config. +Likewise, optional `default_provider_options` are shallow-merged by top-level key into +`SandboxSpec.provider_options`, with explicit agent options winning. Nested mappings are +replaced rather than deep-merged. This keeps provider-specific defaults such as +OpenSandbox's Linux/AMD64 platform in the provider file while the agent config remains +portable to Apptainer. + To ship a custom provider class from a separate package, register it under the `nemo_gym.sandbox_providers` entry point group so it is available on install: @@ -407,8 +419,10 @@ environment: connection: ... spec: resources: ... - provider_options: - platform: ... + provider_options: # contributed by the selected OpenSandbox config + platform: + os: linux + arch: amd64 metadata: ... ``` diff --git a/responses_api_agents/mini_swe_agent_2/app.py b/responses_api_agents/mini_swe_agent_2/app.py index 7cea632365..b78c22e429 100644 --- a/responses_api_agents/mini_swe_agent_2/app.py +++ b/responses_api_agents/mini_swe_agent_2/app.py @@ -47,7 +47,7 @@ NeMoGymResponseCreateParamsNonStreaming, ) from nemo_gym.reward_profile import compute_pass_majority_metrics, highest_k_metrics -from nemo_gym.sandbox import resolve_provider_config, resolve_provider_metadata +from nemo_gym.sandbox import resolve_provider_config, resolve_provider_default_options, resolve_provider_metadata from nemo_gym.server_utils import ( ServerClient, get_first_server_config_dict, @@ -751,6 +751,9 @@ async def run(self, body: MiniSWEAgentRunRequest) -> MiniSWEAgentVerifyResponse: if self.config.sandbox_provider is None: raise ValueError("mini_swe_agent_2 requires sandbox_provider") resolved_sandbox_provider = resolve_provider_config(self.config.sandbox_provider, global_config_dict) + provider_default_options = resolve_provider_default_options( + self.config.sandbox_provider, global_config_dict + ) provider_default_metadata = resolve_provider_metadata(self.config.sandbox_provider, global_config_dict) config.setdefault("environment", {}).update(self.config.sandbox_environment_kwargs or {}) config["environment"]["provider"] = _sandbox_provider_for_config_dump(resolved_sandbox_provider) @@ -762,6 +765,12 @@ async def run(self, body: MiniSWEAgentRunRequest) -> MiniSWEAgentVerifyResponse: if provider_default_metadata: # Provider defaults first; the agent's own spec metadata wins on conflict. instance_spec["metadata"] = {**provider_default_metadata, **(instance_spec.get("metadata") or {})} + if provider_default_options: + # Provider defaults first; the agent's own provider_options win on conflict. + instance_spec["provider_options"] = { + **provider_default_options, + **(instance_spec.get("provider_options") or {}), + } config["environment"]["spec"] = instance_spec should_write_config = True diff --git a/responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_2.yaml b/responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_2.yaml index 9f225012aa..50b16efbd7 100644 --- a/responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_2.yaml +++ b/responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_2.yaml @@ -5,7 +5,8 @@ # # AGENT=responses_api_agents/mini_swe_agent_2/configs/mini_swe_agent_2.yaml # MODEL=responses_api_models/vllm_model/configs/vllm_model.yaml -# ng_run "+config_paths=[$AGENT, nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml, $MODEL]" +# PROVIDER=nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml +# ng_run "+config_paths=[$AGENT, $PROVIDER, $MODEL]" mini_swe_agent_2: responses_api_agents: mini_swe_agent_2: @@ -19,7 +20,7 @@ mini_swe_agent_2: concurrency: 64 env: sandbox # Name of the sandbox to use; include a provider config that defines a - # `sandbox` block (e.g. nemo_gym/sandbox/providers/opensandbox/configs/opensandbox.yaml). + # `sandbox` block (e.g. a config under nemo_gym/sandbox/providers/). sandbox_provider: sandbox sandbox_spec: ttl_s: 18000 @@ -28,10 +29,6 @@ mini_swe_agent_2: cpu: 2 memory_mib: 8192 disk_gib: 20 - provider_options: - platform: - os: linux - arch: amd64 metadata: benchmark: swebench-verified harness: mini-swe-agent diff --git a/responses_api_agents/mini_swe_agent_2/tests/test_app.py b/responses_api_agents/mini_swe_agent_2/tests/test_app.py index 00bed9871c..b363b6d8a2 100644 --- a/responses_api_agents/mini_swe_agent_2/tests/test_app.py +++ b/responses_api_agents/mini_swe_agent_2/tests/test_app.py @@ -784,6 +784,12 @@ async def test_run_resolves_named_sandbox_provider_reference( monkeypatch.chdir(tmp_path) config = create_test_config() config.sandbox_provider = "sandbox" + config.sandbox_spec = { + "provider_options": { + "platform": {"os": "linux", "arch": "arm64"}, + "extensions": {"trace": "enabled"}, + } + } mock_server_client = MagicMock(spec=ServerClient) server = MiniSWEAgent(config=config, server_client=mock_server_client) @@ -792,6 +798,10 @@ async def test_run_resolves_named_sandbox_provider_reference( "policy_model_name": "test_model", "sandbox": { "default_metadata": {"sandbox-api": "opensandbox-sdk"}, + "default_provider_options": { + "platform": {"os": "linux", "arch": "amd64"}, + "skip_health_check": True, + }, "opensandbox": { "connection": { "domain": "sandbox.example", @@ -817,6 +827,65 @@ async def test_run_resolves_named_sandbox_provider_reference( assert "api_key" not in provider["connection"] # Provider default_metadata flows into the sandbox spec metadata. assert generated_config["environment"]["spec"]["metadata"]["sandbox-api"] == "opensandbox-sdk" + # Provider defaults fill missing values; explicit agent options win on conflict. + assert generated_config["environment"]["spec"]["provider_options"] == { + "platform": {"os": "linux", "arch": "arm64"}, + "skip_health_check": True, + "extensions": {"trace": "enabled"}, + } + + @patch("responses_api_agents.mini_swe_agent_2.app.ServerClient.load_from_global_config") + @patch("responses_api_agents.mini_swe_agent_2.app.get_first_server_config_dict") + @patch("responses_api_agents.mini_swe_agent_2.app.get_config_path") + @patch("responses_api_agents.mini_swe_agent_2.app.runner_ray_remote") + @patch("asyncio.to_thread") + async def test_run_resolves_bundled_apptainer_provider_reference( + self, + mock_to_thread, + mock_runner_ray_remote, + mock_get_config_path, + mock_get_first_server_config_dict, + mock_load_from_global_config, + tmp_path, + monkeypatch, + ) -> None: + monkeypatch.chdir(tmp_path) + config = create_test_config() + config.sandbox_provider = "sandbox" + mock_server_client = MagicMock(spec=ServerClient) + server = MiniSWEAgent(config=config, server_client=mock_server_client) + + provider_config_path = ( + Path(__file__).parents[3] + / "nemo_gym" + / "sandbox" + / "providers" + / "apptainer" + / "configs" + / "apptainer.yaml" + ) + bundled_provider_config = yaml.safe_load(provider_config_path.read_text()) + mock_server_client_instance = MagicMock() + mock_server_client_instance.global_config_dict = { + "policy_model_name": "test_model", + **bundled_provider_config, + } + mock_load_from_global_config.return_value = mock_server_client_instance + mock_get_first_server_config_dict.return_value = {"host": "0.0.0.0", "port": 8080} + setup_config_path_mock(mock_get_config_path) + setup_run_mini_swe_mock(mock_to_thread, mock_runner_ray_remote) + + await server.run(create_run_request()) + + mock_runner_ray_remote.options.assert_not_called() + call_args = mock_runner_ray_remote.remote.call_args + params = call_args.args[1] + generated_config = yaml.safe_load(Path(params["config"]).read_text()) + assert generated_config["environment"]["provider"] == { + "apptainer": bundled_provider_config["sandbox"]["apptainer"] + } + assert generated_config["environment"]["spec"]["metadata"]["sandbox-api"] == "apptainer-cli" + assert "provider_options" not in generated_config["environment"]["spec"] @patch("responses_api_agents.mini_swe_agent_2.app.ServerClient.load_from_global_config") @patch("responses_api_agents.mini_swe_agent_2.app.get_first_server_config_dict") diff --git a/tests/unit_tests/test_apptainer_provider.py b/tests/unit_tests/test_apptainer_provider.py index fea5f8ae82..39da0e1951 100644 --- a/tests/unit_tests/test_apptainer_provider.py +++ b/tests/unit_tests/test_apptainer_provider.py @@ -132,6 +132,66 @@ def test_config_validation() -> None: assert apptainer_provider.ApptainerProbeConfig(command=None, timeout_s=0).command is None +def test_provider_options_from_mapping() -> None: + options_cls = apptainer_provider.ApptainerProviderOptions + + assert options_cls.from_mapping(None) == options_cls() + assert options_cls.from_mapping({"binds": "/host:/container"}).binds == ("/host:/container",) + assert options_cls.from_mapping({"binds": ["/a:/a", "/b:/b:ro"]}).binds == ( + "/a:/a", + "/b:/b:ro", + ) + assert options_cls.from_mapping({"binds": ("/a:/a", "/b:/b:ro")}).binds == ( + "/a:/a", + "/b:/b:ro", + ) + assert options_cls.from_mapping({"binds": None}).binds == () + + with pytest.raises(ValueError, match="Unknown Apptainer provider option"): + options_cls.from_mapping({"bogus": True}) + with pytest.raises(TypeError, match="provider_options must be a mapping"): + options_cls.from_mapping(["not", "a", "mapping"]) + with pytest.raises(TypeError, match="'binds' must be a string or list/tuple of strings"): + options_cls.from_mapping({"binds": 123}) + with pytest.raises(TypeError, match="'binds' must be a string or list/tuple of strings"): + options_cls.from_mapping({"binds": ["/host:/container", 123]}) + + +def test_bundled_provider_config(fake_binary: str) -> None: + from omegaconf import OmegaConf + + from nemo_gym.sandbox import ( + resolve_provider_config, + resolve_provider_default_options, + resolve_provider_metadata, + ) + + config_path = Path(apptainer_provider.__file__).parent / "configs" / "apptainer.yaml" + agent_config_path = ( + Path(__file__).parents[2] / "responses_api_agents" / "mini_swe_agent_2" / "configs" / "mini_swe_agent_2.yaml" + ) + config = OmegaConf.merge(OmegaConf.load(config_path), OmegaConf.load(agent_config_path)) + agent_config = config.mini_swe_agent_2.responses_api_agents.mini_swe_agent_2 + + assert agent_config.sandbox_provider == "sandbox" + assert "provider_options" not in agent_config.sandbox_spec + resolved = resolve_provider_config(agent_config.sandbox_provider, config) + assert resolve_provider_metadata(agent_config.sandbox_provider, config) == {"sandbox-api": "apptainer-cli"} + assert resolve_provider_default_options(agent_config.sandbox_provider, config) == {} + + provider = apptainer_provider.ApptainerProvider(**resolved["apptainer"]) + assert provider._exec_config == apptainer_provider.ApptainerExecConfig( + fakeroot_for_root=False, + extra_exec_args=["--cleanenv"], + ) + assert provider._create_config == apptainer_provider.ApptainerCreateConfig( + start_timeout_s=1200, + extra_start_args=["--containall", "--cleanenv", "--fakeroot", "--writable-tmpfs"], + apply_resource_limits=False, + ) + assert provider._probe == apptainer_provider.ApptainerProbeConfig(deadline_s=120) + + def test_resource_flags() -> None: flags = apptainer_provider._resource_flags( SandboxResources(cpu=2, memory_mib=1024, gpu=1, disk_gib=50, gpu_type="h100") @@ -309,10 +369,24 @@ def responder(argv: list[str]) -> tuple[int, str, str]: async def test_create_extra_binds_invalid_type_raises(fake_binary: str, monkeypatch: pytest.MonkeyPatch) -> None: provider, _rec = _make_provider(monkeypatch, lambda argv: (0, "", "")) - with pytest.raises(apptainer_provider.ApptainerCreateError, match="must be a string or list"): + with pytest.raises(TypeError, match="must be a string or list/tuple of strings"): await provider.create(SandboxSpec(image="docker://img", provider_options={"binds": 123})) +async def test_create_rejects_unknown_options_before_allocating( + fake_binary: str, monkeypatch: pytest.MonkeyPatch +) -> None: + provider, rec = _make_provider(monkeypatch, lambda argv: (0, "", "")) + + def unexpected_mkdtemp(*_args: Any, **_kwargs: Any) -> str: + raise AssertionError("mkdtemp must not run for invalid provider options") + + monkeypatch.setattr(apptainer_provider.tempfile, "mkdtemp", unexpected_mkdtemp) + with pytest.raises(ValueError, match="Unknown Apptainer provider option"): + await provider.create(SandboxSpec(image="docker://img", provider_options={"platform": {}})) + assert rec.calls == [] + + async def test_create_requires_image(fake_binary: str, monkeypatch: pytest.MonkeyPatch) -> None: provider, _rec = _make_provider(monkeypatch, lambda argv: (0, "", "")) with pytest.raises(apptainer_provider.ApptainerCreateError, match="image is required"): diff --git a/tests/unit_tests/test_sandbox.py b/tests/unit_tests/test_sandbox.py index 46f0e457d2..e4539523b0 100644 --- a/tests/unit_tests/test_sandbox.py +++ b/tests/unit_tests/test_sandbox.py @@ -38,6 +38,7 @@ list_providers, register_provider, resolve_provider_config, + resolve_provider_default_options, resolve_provider_metadata, ) from nemo_gym.sandbox.api import _AsyncLoopRunner @@ -509,25 +510,39 @@ def test_resolve_provider_config_errors() -> None: resolve_provider_config("sandbox_main", {"sandbox_main": {}}) -def test_resolve_provider_metadata() -> None: +def test_resolve_provider_defaults() -> None: block = { "opensandbox": {"connection": {"domain": "sandbox.example"}}, "default_metadata": {"sandbox-api": "opensandbox-sdk"}, + "default_provider_options": {"platform": {"os": "linux", "arch": "amd64"}}, } # default_metadata is excluded from the provider config and read separately. assert resolve_provider_config(block) == {"opensandbox": {"connection": {"domain": "sandbox.example"}}} assert resolve_provider_metadata(block) == {"sandbox-api": "opensandbox-sdk"} + assert resolve_provider_default_options(block) == { + "platform": {"os": "linux", "arch": "amd64"}, + } # A named reference works the same way. global_config = {"sandbox": block} assert resolve_provider_metadata("sandbox", global_config) == {"sandbox-api": "opensandbox-sdk"} + assert resolve_provider_default_options("sandbox", global_config) == { + "platform": {"os": "linux", "arch": "amd64"}, + } - # No default_metadata key -> empty dict. + # No provider-owned defaults -> empty dicts. assert resolve_provider_metadata({"opensandbox": {}}) == {} + assert resolve_provider_default_options({"opensandbox": {}}) == {} with pytest.raises(ValueError, match="must be a mapping"): resolve_provider_metadata({"opensandbox": {}, "default_metadata": "not-a-mapping"}) + # Preserve the stack's existing behavior for an empty falsy value. + assert resolve_provider_metadata({"opensandbox": {}, "default_metadata": []}) == {} + + for invalid in ("not-a-mapping", []): + with pytest.raises(ValueError, match="must be a mapping"): + resolve_provider_default_options({"opensandbox": {}, "default_provider_options": invalid}) def test_async_sandbox_transfer_fallback_and_unknown_status(tmp_path: Path) -> None: