NVIDIA-NeMo · terrykong · Nov 9, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
@@ -204,7 +204,7 @@ jobs:
       image-name: nemo_rl_container
       dockerfile: docker/Dockerfile
       image-label: nemo-rl
-      target: hermetic
+      target: release
       build-contexts: |
         nemo-rl=${{ github.run_id }}/
       build-args: |

@@ -1,3 +1,4 @@
+# syntax=docker/dockerfile:1
 # Usage:
 # Self-contained build (default: builds from main): docker buildx build -f docker/Dockerfile --tag <registry>/nemo-rl:latest --push .
 # Self-contained build (specific git ref): docker buildx build -f docker/Dockerfile --build-arg NRL_GIT_REF=r0.3.0 --tag <registry>/nemo-rl:r0.3.0 --push .
@@ -10,6 +11,8 @@ ARG NRL_GIT_REF=main
 ADD --keep-git-dir=true https://github.com/NVIDIA-NeMo/RL.git#${NRL_GIT_REF} /
 
 FROM ${BASE_IMAGE} AS base
+# An environment variable to indicate that we are in a container.
+ENV NRL_CONTAINER=1
 
 # It is more convenient for users to run as root
 USER root
@@ -76,10 +79,13 @@ ENV TORCH_CUDA_ARCH_LIST="9.0 10.0"
 
 # First copy only the dependency files
 COPY --from=nemo-rl pyproject.toml uv.lock ./
+# Copy in the top level __init__.py/package_info.py since build-custom-vllm.sh needs the nemo_rl package to exist.
+COPY --from=nemo-rl nemo_rl/__init__.py nemo_rl/package_info.py ./nemo_rl/
 COPY --from=nemo-rl tools/build-custom-vllm.sh ./tools/build-custom-vllm.sh
 COPY --from=nemo-rl --link 3rdparty/ ./3rdparty/
 
 RUN <<"EOF" bash -exu
+uv venv --seed
 if [[ -n "${BUILD_CUSTOM_VLLM:-}" ]]; then
     bash tools/build-custom-vllm.sh
     source 3rdparty/vllm/nemo-rl.env
@@ -117,10 +123,15 @@ LABEL com.nvidia.build.ref="${NVIDIA_BUILD_REF}"
 ENV NEMO_RL_VENV_DIR=/opt/ray_venvs
 
 # Copy in source from build context (defaults to cloned repo, can be overridden)
-COPY --from=nemo-rl . /opt/nemo-rl
+# Exclude pyproject.toml and uv.lock since those may be altered by build-custom-vllm.sh
+COPY --from=nemo-rl --exclude=pyproject.toml --exclude=uv.lock . /opt/nemo-rl
 # Unshallow the repo to get the full history (in the case it was from the scratch layer).
 # Potentially not necessary if the repo is passed in as a complete repository (w/ full git history),
 # so do a quick check before trying to unshallow.
 RUN git rev-parse --is-shallow-repository | grep -q true && git fetch --unshallow || true
 RUN UV_LINK_MODE=symlink uv run nemo_rl/utils/prefetch_venvs.py
 
+# Generate container fingerprint for frozen environment support
+# Store outside /opt/nemo-rl to avoid being overwritten by user mounts
+RUN python tools/generate_fingerprint.py > /opt/nemo_rl_container_fingerprint
+
@@ -66,6 +66,7 @@
     "tasklist",  # Adds support for GitHub-style task lists with [ ] and [x]
 ]
 myst_heading_anchors = 5  # Generates anchor links for headings up to level 5
+myst_fence_as_directive = ["mermaid"]  # Treat ```mermaid blocks as directives
 
 # -- Options for Autodoc2 ---------------------------------------------------
 sys.path.insert(0, os.path.abspath(".."))

@@ -242,6 +242,7 @@ design-docs/design-and-philosophy.md
 design-docs/padding.md
 design-docs/logger.md
 design-docs/uv.md
+design-docs/dependency-management.md
 design-docs/chat-datasets.md
 design-docs/generation.md
 design-docs/checkpointing.md

@@ -16,6 +16,12 @@
 import sys
 from pathlib import Path
 
+# Configure logging to show file location for warnings
+logging.basicConfig(
+    format="%(levelname)s:%(name)s:%(filename)s:%(lineno)d: %(message)s",
+    level=logging.WARNING,
+)
+
 """
 This is a work around to ensure whenever NeMo RL is imported, that we
 add Megatron-LM to the python path. This is because the only sub-package
@@ -49,6 +55,159 @@
 os.environ["RAY_ENABLE_UV_RUN_RUNTIME_ENV"] = "0"
 
 
+def _is_build_isolation():
+    """Detect if we're running in a uv build isolation environment.
+
+    When running uv lock/sync, uv creates a temporary isolated environment
+    in ~/.cache/uv/builds-v*/ to build packages and introspect metadata.
+    We skip the fingerprint check in this context since the user is updating dependencies.
+
+    Returns True if in build isolation, False otherwise.
+    """
+    # Check if we're in uv's build isolation directory
+    # uv always uses paths like: /root/.cache/uv/builds-v0/.tmp*/
+    return "/builds-v" in sys.prefix
+
+
+def _check_container_fingerprint():
+    """Check if container dependencies match the current code (container-only).
+
+    This check only runs when NRL_CONTAINER=1 is set (inside containers).
+    It compares the container's fingerprint (computed at build time) with
+    the current code's fingerprint to detect dependency drift.
+
+    This check is also skipped entirely if NRL_FORCE_REBUILD_VENVS=true is set,
+    since environment rebuilding will ensure dependencies are consistent regardless
+    of a mismatch.
+
+    If there's a mismatch, raises RuntimeError unless NRL_IGNORE_VERSION_MISMATCH is set.
+    """
+    # Skip check if not in container or if we're going to force venv rebuild anyway
+    if not os.environ.get("NRL_CONTAINER"):
+        return
+    if os.environ.get("NRL_FORCE_REBUILD_VENVS", "").lower() == "true":
+        logging.info(
+            "Skipping container fingerprint check because NRL_FORCE_REBUILD_VENVS=true (venvs will be rebuilt anyway)"
+        )
+        return
+
+    # Skip check if we're in a build isolation environment (e.g., during uv lock/sync)
+    if _is_build_isolation():
+        logging.debug(
+            "Skipping container fingerprint check because we're in a build isolation environment"
+        )
+        return
+
+    try:
+        import json
+        import runpy
+        import sys
+        from io import StringIO
+
+        # Get repo root (relative to this module)
+        repo_root = Path(__file__).parent.parent
+        fingerprint_script = repo_root / "tools" / "generate_fingerprint.py"
+
+        # Check if script exists
+        if not fingerprint_script.exists():
+            logging.warning(
+                f"Fingerprint script not found at {fingerprint_script}, skipping version check"
+            )
+            return
+
+        # Compute current code fingerprint using runpy (cleaner than subprocess)
+        old_stdout = sys.stdout
+        sys.stdout = captured_output = StringIO()
+        try:
+            runpy.run_path(str(fingerprint_script), run_name="__main__")
+            current_fingerprint_json = captured_output.getvalue().strip()
+        finally:
+            sys.stdout = old_stdout
+
+        if not current_fingerprint_json:
+            logging.warning("Failed to compute code fingerprint: empty output")
+            return
+
+        current_fingerprint = json.loads(current_fingerprint_json)
+
+        # Read container fingerprint
+        container_fingerprint_file = Path("/opt/nemo_rl_container_fingerprint")
+        if not container_fingerprint_file.exists():
+            logging.warning(
+                "Container fingerprint file not found, skipping version check"
+            )
+            return
+
+        container_fingerprint = json.loads(
+            container_fingerprint_file.read_text().strip()
+        )
+
+        # Compare fingerprints and find differences
+        all_keys = set(current_fingerprint.keys()) | set(container_fingerprint.keys())
+        differences = []
+
+        for key in sorted(all_keys):
+            current_val = current_fingerprint.get(key, "missing")
+            container_val = container_fingerprint.get(key, "missing")
+
+            if current_val != container_val:
+                differences.append(f"  - {key}:")
+                differences.append(f"      Container: {container_val}")
+                differences.append(f"      Current:   {current_val}")
+
+        if differences:
+            diff_text = "\n".join(differences)
+            sep_line = "\n" + ("-" * 80)
+            warning_msg = (
+                f"{sep_line}\n"
+                "WARNING: Container/Code Version Mismatch Detected!\n"
+                f"{sep_line}\n"
+                "Your container's dependencies do not match your current code.\n"
+                "\n"
+                "Differences found:\n"
+                f"{diff_text}\n"
+                "\n"
+                "This can lead to unexpected behavior or errors.\n"
+                "\n"
+                "Solutions:\n"
+                "  1. Rebuild the container to match your code\n"
+                "  2. Set NRL_FORCE_REBUILD_VENVS=true to rebuild virtual environments\n"
+                "     (This forces Ray workers to recreate their venvs with updated dependencies)\n"
+                "  3. Update the container fingerprint to match your current code (for local dev):\n"
+                "     python tools/generate_fingerprint.py > /opt/nemo_rl_container_fingerprint\n"
+                "  4. Set NRL_IGNORE_VERSION_MISMATCH=1 to bypass this check (not recommended)\n"
+                "\n"
+                "Learn more about dependency management:\n"
+                "  https://github.com/NVIDIA-NeMo/RL/blob/main/docs/design-docs/dependency-management.md\n"
+                f"{sep_line}\n"
+            )
+
+            # Check if user wants to ignore the mismatch
+            if os.environ.get("NRL_IGNORE_VERSION_MISMATCH"):
+                logging.warning(
+                    warning_msg
+                    + "Proceeding anyway (NRL_IGNORE_VERSION_MISMATCH is set)..."
+                )
+            else:
+                raise RuntimeError(
+                    warning_msg
+                    + "To proceed anyway, set: export NRL_IGNORE_VERSION_MISMATCH=1"
+                )
+        else:
+            logging.debug("Container fingerprint matches code fingerprint")
+
+    except RuntimeError:
+        # Re-raise RuntimeError for version mismatches (user should see this)
+        raise
+    except Exception as e:
+        # Log other errors but don't crash on version check failures
+        logging.debug(f"Version check failed (non-fatal): {e}")
+
+
+# Perform container version check
+_check_container_fingerprint()
+
+
 def _patch_nsight_file():
     """Patch the nsight.py file to fix the context.py_executable assignment.
 

@@ -11,7 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 import sys
+from pathlib import Path
 
 from nemo_rl.distributed.ray_actor_environment_registry import (
     ACTOR_ENVIRONMENT_REGISTRY,
@@ -52,6 +54,100 @@ def prefetch_venvs():
 
     print("\nVenv prefetching complete!")
 
+    # Create convenience python wrapper scripts for frozen environment support (container-only)
+    create_frozen_environment_symlinks(venv_configs)
+
+
+def create_frozen_environment_symlinks(venv_configs):
+    """Create python-{ClassName} wrapper scripts in /usr/local/bin for frozen environment support.
+
+    Only runs in container (when NRL_CONTAINER=1 is set).
+
+    Args:
+        venv_configs: Dictionary mapping py_executable to list of actor FQNs
+    """
+    # Only create wrapper scripts in container
+    if not os.environ.get("NRL_CONTAINER"):
+        print(
+            "\nSkipping frozen environment wrapper script creation (not in container)"
+        )
+        return
+
+    print("\nCreating frozen environment wrapper scripts...")
+
+    # Collect all wrapper mappings: class_name -> venv_path
+    wrapper_mappings = {}
+
+    for py_executable, actor_fqns in venv_configs.items():
+        for actor_fqn in actor_fqns:
+            # Extract class name from FQN (last part)
+            # e.g., "nemo_rl.models.policy.megatron_policy_worker.MegatronPolicyWorker" -> "MegatronPolicyWorker"
+            class_name = actor_fqn.split(".")[-1]
+
+            # Get the venv path that was created
+            try:
+                python_path = create_local_venv(py_executable, actor_fqn)
+
+                # Check for collisions
+                if class_name in wrapper_mappings:
+                    existing_path = wrapper_mappings[class_name]
+                    if existing_path != python_path:
+                        raise RuntimeError(
+                            f"Collision detected: Multiple venvs want to use name '{class_name}'\n"
+                            f"  Existing: {existing_path}\n"
+                            f"  New: {python_path}\n"
+                            f"This indicates two different worker classes have the same name."
+                        )
+                else:
+                    wrapper_mappings[class_name] = python_path
+            except Exception as e:
+                print(f"  Warning: Could not get venv path for {actor_fqn}: {e}")
+                continue
+
+    # Create wrapper scripts
+    wrapper_dir = Path("/usr/local/bin")
+    created_wrappers = []
+
+    for class_name, python_path in sorted(wrapper_mappings.items()):
+        wrapper_name = f"python-{class_name}"
+        wrapper_path = wrapper_dir / wrapper_name
+
+        # Get the venv directory path (parent of bin/python)
+        venv_path = Path(python_path).parent.parent
+
+        # Create wrapper script content
+        wrapper_content = f"""#!/bin/bash
+VENV_PATH="{venv_path}"
+export VIRTUAL_ENV="$VENV_PATH"
+export PATH="$VENV_PATH/bin:$PATH"
+exec "$VENV_PATH/bin/python" "$@"
+"""
+
+        try:
+            # Remove existing wrapper if present
+            if wrapper_path.exists() or wrapper_path.is_symlink():
+                wrapper_path.unlink()
+
+            # Write wrapper script
+            wrapper_path.write_text(wrapper_content)
+
+            # Make executable
+            wrapper_path.chmod(0o755)
+
+            created_wrappers.append(wrapper_name)
+            print(f"  Created: {wrapper_name} -> {python_path}")
+        except Exception as e:
+            print(f"  Warning: Could not create wrapper script {wrapper_name}: {e}")
+            continue
+
+    if created_wrappers:
+        print(f"\nCreated {len(created_wrappers)} frozen environment wrapper scripts")
+        print("Users can now use these python executables directly:")
+        for name in created_wrappers:
+            print(f"  - {name}")
+    else:
+        print("\nNo frozen environment wrapper scripts were created")
+
 
 if __name__ == "__main__":
     prefetch_venvs()
@@ -17,6 +17,7 @@ requires-python = ">=3.12"
 license = { text = "Apache 2.0" }
 dependencies = [
   "setuptools",
+  "pip",                                                                                                              # Required for frozen environments; uv venv --seed may not reliably install pip
   "ninja",                                                                                                            # for flash-attn parallel build
   "torch==2.8.0",
   "triton; sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')",

@@ -19,6 +19,10 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
 
 cd ${PROJECT_ROOT}
+# This test is intentionally not run with uv run --no-sync to verify that the frozen environment is working correctly.
+time bash ./tests/functional/grpo_frozen_env.sh
+time bash ./tests/functional/test_frozen_env.sh
+
 time uv run --no-sync bash ./tests/functional/sft.sh
 time uv run --no-sync bash ./tests/functional/grpo.sh
 time uv run --no-sync bash ./tests/functional/grpo_async.sh