Skip to content

Commit

Permalink
wait for mofed driver installation after the cleanup driver call
Browse files Browse the repository at this point in the history
Signed-off-by: Tariq Ibrahim <tibrahim@nvidia.com>
  • Loading branch information
tariq1890 committed Feb 21, 2024
1 parent 1461ff9 commit d64af46
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions driver-manager
Original file line number Diff line number Diff line change
Expand Up @@ -571,12 +571,6 @@ _wait_for_mofed_driver() {
}

uninstall_driver() {
# when GPUDirectRDMA is enabled, wait until MOFED driver has finished installing
if _gpu_direct_rdma_enabled; then
echo "GPUDirectRDMA is enabled, validating MOFED driver installation"
_wait_for_mofed_driver
fi

# don't attempt to un-install if driver is pre-installed on the node
if _host_driver; then
echo "NVIDIA GPU driver is already pre-installed on the node, disabling the containerized driver on the node"
Expand Down Expand Up @@ -645,6 +639,12 @@ uninstall_driver() {
_exit_failed
fi

# when GPUDirectRDMA is enabled, wait until MOFED driver has finished installing
if _gpu_direct_rdma_enabled; then
echo "GPUDirectRDMA is enabled, validating MOFED driver installation"
_wait_for_mofed_driver
fi

if _is_gpu_pod_eviction_enabled || _is_auto_drain_enabled; then
# uncordon the node in case if the pod has restarted abruptly after we cordoned the node
_uncordon_k8s_node
Expand Down

0 comments on commit d64af46

Please sign in to comment.