From 5f249121e9cb30c27808e968ae641f769ae0b009 Mon Sep 17 00:00:00 2001 From: Hemil Desai Date: Fri, 26 Jun 2026 09:20:32 -0700 Subject: [PATCH] Split sandbox unit tests in CI Signed-off-by: Hemil Desai --- .github/workflows/full-test-suite.yml | 5 +++-- .github/workflows/unit-tests.yml | 7 +++++-- nemo_gym/cli/main.py | 5 ++++- pyproject.toml | 1 + tests/unit_tests/test_apptainer_provider.py | 3 +++ tests/unit_tests/test_opensandbox_provider.py | 3 +++ tests/unit_tests/test_sandbox.py | 3 +++ 7 files changed, 22 insertions(+), 5 deletions(-) diff --git a/.github/workflows/full-test-suite.yml b/.github/workflows/full-test-suite.yml index 29f8f9a413..22772cc09e 100644 --- a/.github/workflows/full-test-suite.yml +++ b/.github/workflows/full-test-suite.yml @@ -52,12 +52,13 @@ jobs: curl -LsSf https://astral.sh/uv/0.11.19/install.sh | sh uv venv --python 3.12 source .venv/bin/activate - uv sync --extra dev + uv sync --extra dev --extra sandbox - name: Test run: | source .venv/bin/activate - ng_dev_test + PYTEST_ADDOPTS='-m "not sandbox" --cov-report= --cov-fail-under=0' ng_dev_test + pytest --cov=. --cov-append --cov-report=term-missing --durations=10 -m sandbox ng_test_all +fail_on_total_and_test_mismatch=true +delete_venvs_after_each_test=true test-wheel-install: diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 810f75fcc8..ace2e39072 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -166,8 +166,11 @@ jobs: # Full suite: core library unit tests run here once; all server tests run in the # parallel `server-suite` matrix. if [[ "${{ needs.detect.outputs.run_full }}" == "true" ]]; then - echo "Running core library unit tests (server suite runs in the sharded matrix)" - ng_dev_test + echo "Running core library unit tests without sandbox marker (server suite runs in the sharded matrix)" + PYTEST_ADDOPTS='-m "not sandbox" --cov-report= --cov-fail-under=0' ng_dev_test + + echo "Running sandbox unit tests and appending them to the coverage report" + pytest --cov=. --cov-append --cov-report=term-missing --durations=10 -m sandbox # Server-only: test only the changed servers (typically a small set, no sharding needed) elif [[ "${{ needs.detect.outputs.run_servers }}" == "true" ]]; then diff --git a/nemo_gym/cli/main.py b/nemo_gym/cli/main.py index 381975e436..9e74374bc8 100644 --- a/nemo_gym/cli/main.py +++ b/nemo_gym/cli/main.py @@ -43,7 +43,10 @@ class _GymArgumentParser(argparse.ArgumentParser): def error(self, message: str) -> None: match = re.search(r"invalid choice: '([^']+)' \(choose from (.+)\)", message) if match: - typo, choices = match.group(1), re.findall(r"'([^']+)'", match.group(2)) + typo = match.group(1) + choices = re.findall(r"'([^']+)'", match.group(2)) + if not choices: + choices = [choice.strip() for choice in match.group(2).split(",")] message += _did_you_mean(typo, choices) super().error(message) diff --git a/pyproject.toml b/pyproject.toml index 83caf14ff1..61335767ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -490,6 +490,7 @@ markers = [ "system: marks test working at the highest integration level (deselect with '-m \"not system\"')", "acceptance: marks test checking whether the developed product/model passes the user defined acceptance criteria (deselect with '-m \"not acceptance\"')", "docs: mark tests related to documentation (deselect with '-m \"not docs\"')", + "sandbox: marks sandbox unit tests (deselect with '-m \"not sandbox\"')", "skipduringci: marks tests that are skipped during ci as they are addressed by Jenkins jobs but should be run to test user setups", "pleasefixme: marks tests that are broken and need fixing", ] diff --git a/tests/unit_tests/test_apptainer_provider.py b/tests/unit_tests/test_apptainer_provider.py index fea5f8ae82..94de183a6e 100644 --- a/tests/unit_tests/test_apptainer_provider.py +++ b/tests/unit_tests/test_apptainer_provider.py @@ -31,6 +31,9 @@ ) +pytestmark = pytest.mark.sandbox + + FAKE_BINARY = "/usr/bin/apptainer" diff --git a/tests/unit_tests/test_opensandbox_provider.py b/tests/unit_tests/test_opensandbox_provider.py index c57be62a6d..6e1c175ba1 100644 --- a/tests/unit_tests/test_opensandbox_provider.py +++ b/tests/unit_tests/test_opensandbox_provider.py @@ -26,6 +26,9 @@ from nemo_gym.sandbox.providers.base import SandboxResources, SandboxSpec, SandboxStatus +pytestmark = pytest.mark.sandbox + + pytest.importorskip("tenacity", reason="tenacity optional sandbox dependency is not installed") from nemo_gym.sandbox.providers.opensandbox import provider as opensandbox_provider diff --git a/tests/unit_tests/test_sandbox.py b/tests/unit_tests/test_sandbox.py index c26f660b3e..44232e5993 100644 --- a/tests/unit_tests/test_sandbox.py +++ b/tests/unit_tests/test_sandbox.py @@ -42,6 +42,9 @@ from responses_api_agents.mini_swe_agent_2.sandbox_environment import MiniSWESandboxEnvironment +pytestmark = pytest.mark.sandbox + + def _has_module(module_name: str) -> bool: try: return importlib.util.find_spec(module_name) is not None