diff --git a/.github/workflows/full-test-suite.yml b/.github/workflows/full-test-suite.yml index 29f8f9a41..22772cc09 100644 --- a/.github/workflows/full-test-suite.yml +++ b/.github/workflows/full-test-suite.yml @@ -52,12 +52,13 @@ jobs: curl -LsSf https://astral.sh/uv/0.11.19/install.sh | sh uv venv --python 3.12 source .venv/bin/activate - uv sync --extra dev + uv sync --extra dev --extra sandbox - name: Test run: | source .venv/bin/activate - ng_dev_test + PYTEST_ADDOPTS='-m "not sandbox" --cov-report= --cov-fail-under=0' ng_dev_test + pytest --cov=. --cov-append --cov-report=term-missing --durations=10 -m sandbox ng_test_all +fail_on_total_and_test_mismatch=true +delete_venvs_after_each_test=true test-wheel-install: diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 810f75fcc..ace2e3907 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -166,8 +166,11 @@ jobs: # Full suite: core library unit tests run here once; all server tests run in the # parallel `server-suite` matrix. if [[ "${{ needs.detect.outputs.run_full }}" == "true" ]]; then - echo "Running core library unit tests (server suite runs in the sharded matrix)" - ng_dev_test + echo "Running core library unit tests without sandbox marker (server suite runs in the sharded matrix)" + PYTEST_ADDOPTS='-m "not sandbox" --cov-report= --cov-fail-under=0' ng_dev_test + + echo "Running sandbox unit tests and appending them to the coverage report" + pytest --cov=. --cov-append --cov-report=term-missing --durations=10 -m sandbox # Server-only: test only the changed servers (typically a small set, no sharding needed) elif [[ "${{ needs.detect.outputs.run_servers }}" == "true" ]]; then diff --git a/nemo_gym/cli/main.py b/nemo_gym/cli/main.py index 381975e43..9e74374bc 100644 --- a/nemo_gym/cli/main.py +++ b/nemo_gym/cli/main.py @@ -43,7 +43,10 @@ class _GymArgumentParser(argparse.ArgumentParser): def error(self, message: str) -> None: match = re.search(r"invalid choice: '([^']+)' \(choose from (.+)\)", message) if match: - typo, choices = match.group(1), re.findall(r"'([^']+)'", match.group(2)) + typo = match.group(1) + choices = re.findall(r"'([^']+)'", match.group(2)) + if not choices: + choices = [choice.strip() for choice in match.group(2).split(",")] message += _did_you_mean(typo, choices) super().error(message) diff --git a/pyproject.toml b/pyproject.toml index 83caf14ff..61335767e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -490,6 +490,7 @@ markers = [ "system: marks test working at the highest integration level (deselect with '-m \"not system\"')", "acceptance: marks test checking whether the developed product/model passes the user defined acceptance criteria (deselect with '-m \"not acceptance\"')", "docs: mark tests related to documentation (deselect with '-m \"not docs\"')", + "sandbox: marks sandbox unit tests (deselect with '-m \"not sandbox\"')", "skipduringci: marks tests that are skipped during ci as they are addressed by Jenkins jobs but should be run to test user setups", "pleasefixme: marks tests that are broken and need fixing", ] diff --git a/tests/unit_tests/test_apptainer_provider.py b/tests/unit_tests/test_apptainer_provider.py index fea5f8ae8..94de183a6 100644 --- a/tests/unit_tests/test_apptainer_provider.py +++ b/tests/unit_tests/test_apptainer_provider.py @@ -31,6 +31,9 @@ ) +pytestmark = pytest.mark.sandbox + + FAKE_BINARY = "/usr/bin/apptainer" diff --git a/tests/unit_tests/test_opensandbox_provider.py b/tests/unit_tests/test_opensandbox_provider.py index c57be62a6..6e1c175ba 100644 --- a/tests/unit_tests/test_opensandbox_provider.py +++ b/tests/unit_tests/test_opensandbox_provider.py @@ -26,6 +26,9 @@ from nemo_gym.sandbox.providers.base import SandboxResources, SandboxSpec, SandboxStatus +pytestmark = pytest.mark.sandbox + + pytest.importorskip("tenacity", reason="tenacity optional sandbox dependency is not installed") from nemo_gym.sandbox.providers.opensandbox import provider as opensandbox_provider diff --git a/tests/unit_tests/test_sandbox.py b/tests/unit_tests/test_sandbox.py index c26f660b3..44232e599 100644 --- a/tests/unit_tests/test_sandbox.py +++ b/tests/unit_tests/test_sandbox.py @@ -42,6 +42,9 @@ from responses_api_agents.mini_swe_agent_2.sandbox_environment import MiniSWESandboxEnvironment +pytestmark = pytest.mark.sandbox + + def _has_module(module_name: str) -> bool: try: return importlib.util.find_spec(module_name) is not None