NVIDIA-NeMo · snowmanwwg · Jun 17, 2026 · Jun 25, 2026 · Jun 20, 2026 · Jun 26, 2026
@@ -465,6 +465,7 @@ def _spinup_nemo_gym(base_urls, model_name):
             nemo_gym_py_exec = create_local_venv_on_each_node(
                 nemo_gym_py_exec, "nemo_rl.environments.nemo_gym.NemoGym"
             )
+        nemo_gym_venv = os.path.dirname(os.path.dirname(nemo_gym_py_exec))
         nemo_gym_dict = env_configs["nemo_gym"]
         # NeMo-RL-side detection knobs are top-level NemoGymConfig fields
         # (where the detector reads them), not part of Gym's global config.
@@ -497,8 +498,8 @@ def _spinup_nemo_gym(base_urls, model_name):
             "py_executable": nemo_gym_py_exec,
             "env_vars": {
                 **os.environ,
-                "VIRTUAL_ENV": nemo_gym_py_exec,
-                "UV_PROJECT_ENVIRONMENT": nemo_gym_py_exec,
+                "VIRTUAL_ENV": nemo_gym_venv,
+                "UV_PROJECT_ENVIRONMENT": nemo_gym_venv,
             },
         }
         actor = NemoGym.options(**nemo_gym_opts).remote(nemo_gym_cfg)

@@ -13,6 +13,7 @@
 # limitations under the License.
 import os
 import subprocess
+import sys
 from pathlib import Path
 from typing import Any, Dict, List, NotRequired, TypedDict
 
@@ -38,6 +39,30 @@
 DEFAULT_THINKING_TAGS = ["<think>", "</think>"]
 
 
+def _ensure_nemo_gym_package_precedence() -> None:
+    """Prefer the third-party NeMo-Gym package over examples/nemo_gym."""
+    repo_root = Path(__file__).resolve().parents[2]
+    gym_workspace = repo_root / "3rdparty" / "Gym-workspace" / "Gym"
+    gym_init = gym_workspace / "nemo_gym" / "__init__.py"
+    if not gym_init.exists():
+        return
+
+    gym_workspace_str = str(gym_workspace)
+    if sys.path[:1] != [gym_workspace_str]:
+        sys.path[:] = [p for p in sys.path if p != gym_workspace_str]
+        sys.path.insert(0, gym_workspace_str)
+
+    shadowed_module = sys.modules.get("nemo_gym")
+    if (
+        shadowed_module is not None
+        and getattr(shadowed_module, "__file__", None) is None
+        and not hasattr(shadowed_module, "PARENT_DIR")
+    ):
+        for module_name in list(sys.modules):
+            if module_name == "nemo_gym" or module_name.startswith("nemo_gym."):
+                del sys.modules[module_name]
+
+
 def get_nemo_gym_uv_cache_dir() -> str | None:
     """Return the uv cache directory inside a container, or None outside one.
 
@@ -158,6 +183,8 @@ def _spinup(self) -> None:
         _gym_port_high = self.cfg.get("port_range_high", DEFAULT_GYM_PORT_RANGE_HIGH)
         self.head_server_port = _get_free_port_local(_gym_port_low, _gym_port_high)
 
+        _ensure_nemo_gym_package_precedence()
+
         from nemo_gym.cli import GlobalConfigDictParserConfig, RunHelper
         from nemo_gym.rollout_collection import RolloutCollectionHelper
         from nemo_gym.server_utils import HEAD_SERVER_KEY_NAME, BaseServerConfig

@@ -722,15 +722,22 @@ async def create_chat_completion(
                 generator = await openai_serving_chat.create_chat_completion(
                     request, raw_request
                 )
-            except VLLMValidationError as e:
+            except (ValueError, VLLMValidationError) as e:
                 # vLLM 0.20 raises VLLMValidationError for prompts exceeding
                 # max_model_len during tokenization, instead of returning an
-                # ErrorResponse. Convert to HTTP 400 so the Gym proxy can
-                # detect context-length overflow and handle it gracefully.
+                # ErrorResponse. Our post-tokenization clamp can raise a local
+                # ValueError for the same condition after prefix replacement.
+                # Convert those cases to HTTP 400 so the Gym proxy can detect
+                # context-length overflow and handle it gracefully.
+                message = str(e)
+                if isinstance(e, ValueError) and not (
+                    "max_model_len" in message or "maximum context length" in message
+                ):
+                    raise
-                    raise
+                if not isinstance(e, VLLMValidationError) and not (
+                    "max_model_len" in message or "maximum context length" in message
+                ):
+                    raise
-                    raise
+                if not isinstance(e, VLLMValidationError) and not (
+                    "max_model_len" in message or "maximum context length" in message
+                ):
+                    raise
                 return JSONResponse(
                     content={
                         "error": {
-                            "message": str(e),
+                            "message": message,
                             "type": "invalid_request_error",
                             "code": 400,
                         }

@@ -525,7 +525,12 @@ echo "All workers connected!"
 # This driver process is responsible for launching a job on the Ray cluster
 CONTAINER_CWD=$(scontrol show job $SLURM_JOB_ID | grep -oP 'WorkDir=\K[^ ]+' | head -1)
 if [[ -n "$COMMAND" ]]; then
+  set +e
   srun --no-container-mount-home --overlap --container-name=ray-head --container-workdir=$CONTAINER_CWD --nodes=1 --ntasks=1 -w "$head_node" -o $LOG_DIR/ray-driver.log bash -c "$COMMAND"
+  driver_exit_code=$?
+  set -e
+  touch "$LOG_DIR/ENDED"
+  exit "$driver_exit_code"
 else
   echo "[INFO]: Ray Cluster is idled, run this on the slurm head node to get a shell to the head node:"
   cat <<EOF >$SLURM_SUBMIT_DIR/${SLURM_JOB_ID}-attach.sh