From 61b24aba221531fb977bb66ce399fac006e58530 Mon Sep 17 00:00:00 2001 From: Vedaanta Agarwalla <142048820+vedaanta-nvidia@users.noreply.github.com> Date: Tue, 19 Mar 2024 23:29:19 -0700 Subject: [PATCH 1/3] bumps cudnnex to 1.2.1 --- .azure/gpu-tests.yml | 8 ++++---- dockers/ubuntu-cuda/Dockerfile | 2 +- docs/source/fundamentals/installation.rst | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.azure/gpu-tests.yml b/.azure/gpu-tests.yml index 22ba01eddc..8ea5f5ec22 100644 --- a/.azure/gpu-tests.yml +++ b/.azure/gpu-tests.yml @@ -17,17 +17,17 @@ jobs: matrix: # CUDA 12.1 'ubuntu22.04 | cuda 12.1 | python 3.10 | torch 2.2 | regular': - docker-image: 'pytorchlightning/lightning-thunder:ubuntu22.04-cuda12.1.1-cudnn-fe1.1.0-py3.10-pt_2.2.1' + docker-image: 'pytorchlightning/lightning-thunder:ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_2.2.1' CUDA_VERSION_MM: '121' 'ubuntu22.04 | cuda 12.1 | python 3.10 | torch 2.2 | distributed': - docker-image: 'pytorchlightning/lightning-thunder:ubuntu22.04-cuda12.1.1-cudnn-fe1.1.0-py3.10-pt_2.2.1' + docker-image: 'pytorchlightning/lightning-thunder:ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_2.2.1' CUDA_VERSION_MM: '121' testing: 'distributed' 'ubuntu22.04 | cuda 12.1 | python 3.10 | torch-nightly | regular': - docker-image: 'pytorchlightning/lightning-thunder:ubuntu22.04-cuda12.1.1-cudnn-fe1.2.0-py3.10-pt_main' + docker-image: 'pytorchlightning/lightning-thunder:ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_main' CUDA_VERSION_MM: '121' 'ubuntu22.04 | cuda 12.1 | python 3.10 | torch-nightly | distributed': - docker-image: 'pytorchlightning/lightning-thunder:ubuntu22.04-cuda12.1.1-cudnn-fe1.2.0-py3.10-pt_main' + docker-image: 'pytorchlightning/lightning-thunder:ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_main' CUDA_VERSION_MM: '121' testing: 'distributed' # how long to run the job before automatically cancelling diff --git a/dockers/ubuntu-cuda/Dockerfile b/dockers/ubuntu-cuda/Dockerfile index e815d827f6..d4ccc4b878 100644 --- a/dockers/ubuntu-cuda/Dockerfile +++ b/dockers/ubuntu-cuda/Dockerfile @@ -20,7 +20,7 @@ ARG IMAGE_TYPE="devel" FROM nvidia/cuda:${CUDA_VERSION}-${IMAGE_TYPE}-ubuntu${UBUNTU_VERSION} ARG CUDNN_VERSION="8.9.7.29-1" -ARG CUDNN_FRONTEND_CHECKOUT="v1.1.0" +ARG CUDNN_FRONTEND_CHECKOUT="v1.2.1" ARG PYTHON_VERSION="3.10" ARG TORCH_VERSION="2.2.1" ARG TRITON_VERSION="2.2.0" diff --git a/docs/source/fundamentals/installation.rst b/docs/source/fundamentals/installation.rst index 8d41a24047..a426f947b6 100644 --- a/docs/source/fundamentals/installation.rst +++ b/docs/source/fundamentals/installation.rst @@ -39,7 +39,7 @@ Thunder can use NVIDIA's cuDNN Python frontend bindings to accelerate some PyTor export CUDNN_PATH=/usr/local/lib/python3.10/dist-packages/nvidia/cudnn/ for file in $CUDNN_PATH/lib/*.so.[0-9]; do filename_without_version="${file%??}"; ln -s $file $filename_without_version; done - git clone -b v1.1.0 https://github.com/NVIDIA/cudnn-frontend.git + git clone -b v1.2.1 https://github.com/NVIDIA/cudnn-frontend.git export CUDAToolkit_ROOT=/path/to/cuda CMAKE_BUILD_PARALLEL_LEVEL=16 pip install cudnn_frontend/ -v From b3d93ba9a685d0fde69b20bfe4e99833ba0cc7a0 Mon Sep 17 00:00:00 2001 From: Vedaanta Agarwalla Date: Thu, 21 Mar 2024 11:02:44 -0700 Subject: [PATCH 2/3] bumps cudnn FE in docker build jobs --- .azure/docker-build.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.azure/docker-build.yml b/.azure/docker-build.yml index 73233ae78c..7ebc7e1288 100644 --- a/.azure/docker-build.yml +++ b/.azure/docker-build.yml @@ -41,13 +41,13 @@ jobs: matrix: # CUDA 12.1 'cuda 12.1 | torch 2.2 | cudnn FE v1.1': # todo: drop updating this image when CI transition to newer FE version - {CUDA_VERSION: '12.1.1', TORCH_VERSION: '2.2.1', TRITON_VERSION: '2.2.0', CUDNN_FRONTEND: "1.1.0"} + {CUDA_VERSION: '12.1.1', TORCH_VERSION: '2.2.1', TRITON_VERSION: '2.2.0', CUDNN_FRONTEND: "1.2.1"} 'cuda 12.1 | torch 2.2 | cudnn FE v1.2': - {CUDA_VERSION: '12.1.1', TORCH_VERSION: '2.2.1', TRITON_VERSION: '2.2.0', CUDNN_FRONTEND: "1.2.0"} + {CUDA_VERSION: '12.1.1', TORCH_VERSION: '2.2.1', TRITON_VERSION: '2.2.0', CUDNN_FRONTEND: "1.2.1"} 'cuda 12.1 | torch 2.3 /nightly | cudnn FE v1.1': # todo: drop updating this image when CI transition to newer FE version - {CUDA_VERSION: '12.1.1', TORCH_VERSION: 'main', TORCH_INSTALL: 'source', CUDNN_FRONTEND: "1.1.0"} + {CUDA_VERSION: '12.1.1', TORCH_VERSION: 'main', TORCH_INSTALL: 'source', CUDNN_FRONTEND: "1.2.1"} 'cuda 12.1 | torch 2.3 /nightly | cudnn FE v1.2': - {CUDA_VERSION: '12.1.1', TORCH_VERSION: 'main', TORCH_INSTALL: 'source', CUDNN_FRONTEND: "1.2.0"} + {CUDA_VERSION: '12.1.1', TORCH_VERSION: 'main', TORCH_INSTALL: 'source', CUDNN_FRONTEND: "1.2.1"} #'cuda 12.1': # this version - '8.9.5.29-1+cuda12.1' for 'libcudnn8' was not found # how much time to give 'run always even if cancelled tasks' before stopping them cancelTimeoutInMinutes: "2" From b376caec883e900b31a40635e654f725c3bead37 Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 22 Mar 2024 15:17:32 +0100 Subject: [PATCH 3/3] prune --- .azure/docker-build.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.azure/docker-build.yml b/.azure/docker-build.yml index 7ebc7e1288..bbef960124 100644 --- a/.azure/docker-build.yml +++ b/.azure/docker-build.yml @@ -40,12 +40,8 @@ jobs: #maxParallel: "3" matrix: # CUDA 12.1 - 'cuda 12.1 | torch 2.2 | cudnn FE v1.1': # todo: drop updating this image when CI transition to newer FE version - {CUDA_VERSION: '12.1.1', TORCH_VERSION: '2.2.1', TRITON_VERSION: '2.2.0', CUDNN_FRONTEND: "1.2.1"} 'cuda 12.1 | torch 2.2 | cudnn FE v1.2': {CUDA_VERSION: '12.1.1', TORCH_VERSION: '2.2.1', TRITON_VERSION: '2.2.0', CUDNN_FRONTEND: "1.2.1"} - 'cuda 12.1 | torch 2.3 /nightly | cudnn FE v1.1': # todo: drop updating this image when CI transition to newer FE version - {CUDA_VERSION: '12.1.1', TORCH_VERSION: 'main', TORCH_INSTALL: 'source', CUDNN_FRONTEND: "1.2.1"} 'cuda 12.1 | torch 2.3 /nightly | cudnn FE v1.2': {CUDA_VERSION: '12.1.1', TORCH_VERSION: 'main', TORCH_INSTALL: 'source', CUDNN_FRONTEND: "1.2.1"} #'cuda 12.1': # this version - '8.9.5.29-1+cuda12.1' for 'libcudnn8' was not found