From c77b31ae2f1b0c6341cfdbbebe80e4ee03c6522b Mon Sep 17 00:00:00 2001
From: yaoyu-33 <yaoyu.094@gmail.com>
Date: Wed, 6 May 2026 19:46:45 -0700
Subject: [PATCH] [ci, build] chore: Revert "refactor(docker): move hybridep
 install from fw_base to Dockerfile.ci (#3702)"

This reverts commit dec6d631f4629888356006f4839dc33fbbb7861f.

L0_Launch_converter::test_generate_vlm regressed from ~1m44s to ~9m
starting at this commit. Reverting to confirm whether the DeepEP
install (and its env vars HYBRID_EP_MULTINODE / LD_LIBRARY_PATH) being
baked into the CI image is the cause.

Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com>
---
 docker/Dockerfile.ci      | 48 ---------------------------------------
 docker/Dockerfile.fw_base | 48 +++++++++++++++++++++++++++++++++++++++
 docker/README.md          |  6 ++---
 3 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/docker/Dockerfile.ci b/docker/Dockerfile.ci
index 7ec51d9d71..819c1fb37c 100644
--- a/docker/Dockerfile.ci
+++ b/docker/Dockerfile.ci
@@ -30,54 +30,6 @@ RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
-##############################################################################
-##
-## Install DeepEP and nvshmem
-##
-##############################################################################
-
-ARG INSTALL_DEEPEP=True
-ARG DEEPEP_COMMIT=34152ae28f80bcc3ee38d7a12cb2ad87cfd4ea72
-
-ENV HYBRID_EP_MULTINODE=1
-ENV RDMA_CORE_HOME=/opt/rdma-core/build
-ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64/:$LD_LIBRARY_PATH
-RUN --mount=type=bind,source=docker/patches/deepep.patch,target=/opt/deepep.patch \
-    if [ "$INSTALL_DEEPEP" = "True" ]; then \
-    # Upgrade system rdma-core to v60; libibverbs-dev supplies the unversioned
-    # libibverbs.so symlink required at link time.
-    apt-get update && apt-get install -y --allow-change-held-packages \
-        rdma-core libibverbs-dev && \
-    apt-get clean && \
-    ARCH_LIB=$(dpkg-architecture -qDEB_HOST_MULTIARCH) && \
-    # libmlx5-dev is not available in the apt sources; create the unversioned
-    # symlink from the versioned one as a fallback for the DeepEP build.
-    test -f /usr/lib/${ARCH_LIB}/libmlx5.so || \
-        ln -sf /usr/lib/${ARCH_LIB}/libmlx5.so.1 /usr/lib/${ARCH_LIB}/libmlx5.so && \
-    # Expose the system install under RDMA_CORE_HOME so DeepEP setup.py finds
-    # $RDMA_CORE_HOME/include and $RDMA_CORE_HOME/lib without a separate source build.
-    mkdir -p ${RDMA_CORE_HOME} && \
-    ln -sfn /usr/include ${RDMA_CORE_HOME}/include && \
-    ln -sfn /usr/lib/${ARCH_LIB} ${RDMA_CORE_HOME}/lib && \
-    git clone https://github.com/deepseek-ai/DeepEP.git && \
-    pushd DeepEP && \
-        git pull && \
-        git fetch origin $DEEPEP_COMMIT && \
-        git checkout FETCH_HEAD && \
-        patch -p1 < /opt/deepep.patch && \
-        pip install --no-cache-dir nvidia-nvshmem-cu13==3.4.5 && \
-        # Create symlink for libnvshmem_host.so (required for DeepEP fabric handle operations)
-        NVSHMEM_LIB_PATH=$(pip show nvidia-nvshmem-cu13 | grep "Location:" | cut -d' ' -f2)/nvidia/nvshmem/lib && \
-        ln -sf ${NVSHMEM_LIB_PATH}/libnvshmem_host.so.3 ${NVSHMEM_LIB_PATH}/libnvshmem_host.so && \
-        apt-get update && \
-        apt-get install -y --no-install-recommends libnvidia-ml-dev && \
-        TORCH_CUDA_ARCH_LIST="9.0 10.0 12.0" pip install --no-cache-dir --no-build-isolation -v . && \
-        apt-get purge -y libnvidia-ml-dev && \
-        apt-get autoremove -y && \
-        rm -rf /var/lib/apt/lists/* && \
-    popd; \
-    fi
-
 COPY pyproject.toml uv.lock /opt/Megatron-Bridge/
 COPY src/megatron/bridge/__init__.py src/megatron/bridge/package_info.py /opt/Megatron-Bridge/src/megatron/bridge/
 COPY 3rdparty/Megatron-LM/pyproject.toml 3rdparty/Megatron-LM/setup.py /opt/Megatron-Bridge/3rdparty/Megatron-LM/
diff --git a/docker/Dockerfile.fw_base b/docker/Dockerfile.fw_base
index 7492ee8bc9..b81ad4cdaa 100644
--- a/docker/Dockerfile.fw_base
+++ b/docker/Dockerfile.fw_base
@@ -122,6 +122,54 @@ RUN --mount=type=bind,source=docker/common/install_nsys.sh,target=/opt/install_n
     bash /opt/install_cublas.sh --CUBLAS_VER=$CUBLAS_VERSION; \
     fi
 
+##############################################################################
+##
+## Install DeepEP and nvshmem
+##
+##############################################################################
+
+ARG INSTALL_DEEPEP
+ARG DEEPEP_COMMIT
+
+ENV HYBRID_EP_MULTINODE=1
+ENV RDMA_CORE_HOME=/opt/rdma-core/build
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64/:$LD_LIBRARY_PATH
+RUN --mount=type=bind,source=docker/patches/deepep.patch,target=/opt/deepep.patch \
+    if [ "$INSTALL_DEEPEP" = "True" ]; then \
+    # Upgrade system rdma-core to v60; libibverbs-dev supplies the unversioned
+    # libibverbs.so symlink required at link time.
+    apt-get update && apt-get install -y --allow-change-held-packages \
+        rdma-core libibverbs-dev && \
+    apt-get clean && \
+    ARCH_LIB=$(dpkg-architecture -qDEB_HOST_MULTIARCH) && \
+    # libmlx5-dev is not available in the apt sources; create the unversioned
+    # symlink from the versioned one as a fallback for the DeepEP build.
+    test -f /usr/lib/${ARCH_LIB}/libmlx5.so || \
+        ln -sf /usr/lib/${ARCH_LIB}/libmlx5.so.1 /usr/lib/${ARCH_LIB}/libmlx5.so && \
+    # Expose the system install under RDMA_CORE_HOME so DeepEP setup.py finds
+    # $RDMA_CORE_HOME/include and $RDMA_CORE_HOME/lib without a separate source build.
+    mkdir -p ${RDMA_CORE_HOME} && \
+    ln -sfn /usr/include ${RDMA_CORE_HOME}/include && \
+    ln -sfn /usr/lib/${ARCH_LIB} ${RDMA_CORE_HOME}/lib && \
+    git clone https://github.com/deepseek-ai/DeepEP.git && \
+    pushd DeepEP && \
+        git pull && \
+        git fetch origin $DEEPEP_COMMIT && \
+        git checkout FETCH_HEAD && \
+        patch -p1 < /opt/deepep.patch && \
+        pip install --no-cache-dir nvidia-nvshmem-cu13==3.4.5 && \
+        # Create symlink for libnvshmem_host.so (required for DeepEP fabric handle operations)
+        NVSHMEM_LIB_PATH=$(pip show nvidia-nvshmem-cu13 | grep "Location:" | cut -d' ' -f2)/nvidia/nvshmem/lib && \
+        ln -sf ${NVSHMEM_LIB_PATH}/libnvshmem_host.so.3 ${NVSHMEM_LIB_PATH}/libnvshmem_host.so && \
+        apt-get update && \
+        apt-get install -y --no-install-recommends libnvidia-ml-dev && \
+        TORCH_CUDA_ARCH_LIST="9.0 10.0 12.0" pip install --no-cache-dir --no-build-isolation -v . && \
+        apt-get purge -y libnvidia-ml-dev && \
+        apt-get autoremove -y && \
+        rm -rf /var/lib/apt/lists/* && \
+    popd; \
+    fi
+
 ##############################################################################
 ##
 ## Install vLLM
diff --git a/docker/README.md b/docker/README.md
index 9dd225b8ef..686eb0d4b7 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -123,6 +123,8 @@ docker build \
 | `NCCL_VER` | NCCL version for the TRT-LLM install scripts |
 | `CUBLAS_VER` | cuBLAS version for the TRT-LLM install scripts |
 | `NVRTC_VER` | NVRTC version for the TRT-LLM install scripts |
+| `INSTALL_DEEPEP` | Set to `True` to build and install DeepEP and nvshmem |
+| `DEEPEP_COMMIT` | DeepEP git commit SHA |
 | `REINSTALL_NSYS` | Set to `True` to reinstall Nsight Systems from the NVIDIA apt repo |
 | `NSYS_VERSION` | Nsight Systems version (e.g. `2026.1.0.1085`) |
 | `REINSTALL_CUDNN` | Set to `True` to reinstall cuDNN from the NVIDIA apt repo |
@@ -137,8 +139,6 @@ docker build \
 | Argument | Description |
 |---|---|
 | `BASE_IMAGE` | Base container; set to the fw-base image when building the full stack |
-| `INSTALL_DEEPEP` | Set to `True` to build and install DeepEP and nvshmem |
-| `DEEPEP_COMMIT` | DeepEP git commit SHA |
 | `MCORE_TRIGGERED_TESTING` | Skip uv lockfile check for cross-version Megatron-LM testing |
 | `UV_CACHE_PRUNE_ARGS` | Extra arguments for `uv cache prune` |
 
@@ -163,5 +163,5 @@ docker build \
 | `common/install_nccl.sh` | Reinstall NCCL from the public NVIDIA CUDA apt repo |
 | `common/install_cudnn.sh` | Reinstall cuDNN from the public NVIDIA CUDA apt repo |
 | `common/install_nsys.sh` | Reinstall Nsight Systems from the public NVIDIA CUDA apt repo |
-| `patches/deepep.patch` | Patch applied to DeepEP during CI image build |
+| `patches/deepep.patch` | Patch applied to DeepEP during fw-base build |
 | `patches/vllm.patch` | Patch applied to vLLM after install in fw-base |