From f0b5ab3fad08bdbc250a895fd2151c4c63abe512 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Tue, 16 Sep 2025 00:43:42 -0700 Subject: [PATCH 01/84] Adding softlink with MajorVersion number for pciutils --- repos/spack_repo/builtin/packages/pciutils/package.py | 6 ++++++ .../builtin/packages/rocm_validation_suite/package.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/pciutils/package.py b/repos/spack_repo/builtin/packages/pciutils/package.py index 4f3e5eaf893..5b499e0bc9f 100644 --- a/repos/spack_repo/builtin/packages/pciutils/package.py +++ b/repos/spack_repo/builtin/packages/pciutils/package.py @@ -46,6 +46,12 @@ def install(self, spec, prefix): if os.path.islink(symlink_path) or os.path.exists(symlink_path): os.remove(symlink_path) os.symlink(os.path.basename(so_candidates[0]), symlink_path) + major_version = str(self.version).split('.')[0] + libname = f"libpci.so.{major_version}" + symlink_path = os.path.join(lib_dir, libname) + if os.path.islink(symlink_path) or os.path.exists(symlink_path): + os.remove(symlink_path) + os.symlink(os.path.basename(so_candidates[0]), symlink_path) else: make("install", "PREFIX={0}".format(prefix)) diff --git a/repos/spack_repo/builtin/packages/rocm_validation_suite/package.py b/repos/spack_repo/builtin/packages/rocm_validation_suite/package.py index 4ff94d90853..51393b63a60 100644 --- a/repos/spack_repo/builtin/packages/rocm_validation_suite/package.py +++ b/repos/spack_repo/builtin/packages/rocm_validation_suite/package.py @@ -66,7 +66,7 @@ class RocmValidationSuite(CMakePackage): depends_on("googletest") depends_on("doxygen", type="build") depends_on("libdrm", when="@6.4:") - depends_on("pciutils+shared", type="build", when="@6.4:") + depends_on("pciutils+shared", when="@6.4:") def setup_build_environment(self, env: EnvironmentModifications) -> None: spec = self.spec From 2ff1033b05596832075ccbcf66cf443b6afb8b25 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Tue, 16 Sep 2025 00:46:48 -0700 Subject: [PATCH 02/84] Revert "Adding softlink with MajorVersion number for pciutils" This reverts commit f0b5ab3fad08bdbc250a895fd2151c4c63abe512. --- repos/spack_repo/builtin/packages/pciutils/package.py | 6 ------ .../builtin/packages/rocm_validation_suite/package.py | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/repos/spack_repo/builtin/packages/pciutils/package.py b/repos/spack_repo/builtin/packages/pciutils/package.py index 5b499e0bc9f..4f3e5eaf893 100644 --- a/repos/spack_repo/builtin/packages/pciutils/package.py +++ b/repos/spack_repo/builtin/packages/pciutils/package.py @@ -46,12 +46,6 @@ def install(self, spec, prefix): if os.path.islink(symlink_path) or os.path.exists(symlink_path): os.remove(symlink_path) os.symlink(os.path.basename(so_candidates[0]), symlink_path) - major_version = str(self.version).split('.')[0] - libname = f"libpci.so.{major_version}" - symlink_path = os.path.join(lib_dir, libname) - if os.path.islink(symlink_path) or os.path.exists(symlink_path): - os.remove(symlink_path) - os.symlink(os.path.basename(so_candidates[0]), symlink_path) else: make("install", "PREFIX={0}".format(prefix)) diff --git a/repos/spack_repo/builtin/packages/rocm_validation_suite/package.py b/repos/spack_repo/builtin/packages/rocm_validation_suite/package.py index 51393b63a60..4ff94d90853 100644 --- a/repos/spack_repo/builtin/packages/rocm_validation_suite/package.py +++ b/repos/spack_repo/builtin/packages/rocm_validation_suite/package.py @@ -66,7 +66,7 @@ class RocmValidationSuite(CMakePackage): depends_on("googletest") depends_on("doxygen", type="build") depends_on("libdrm", when="@6.4:") - depends_on("pciutils+shared", when="@6.4:") + depends_on("pciutils+shared", type="build", when="@6.4:") def setup_build_environment(self, env: EnvironmentModifications) -> None: spec = self.spec From d7233529f2bf78aa8e66d139f35fbd6518a2cb74 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Fri, 19 Sep 2025 00:41:26 -0700 Subject: [PATCH 03/84] Enabling ci build for py-torch in rocm --- .../builtin/packages/aotriton/package.py | 3 ++ .../builtin/packages/py_torch/package.py | 24 +++++++-- stacks/ml-linux-x86_64-rocm/spack.yaml | 49 +++++++++---------- 3 files changed, 47 insertions(+), 29 deletions(-) diff --git a/repos/spack_repo/builtin/packages/aotriton/package.py b/repos/spack_repo/builtin/packages/aotriton/package.py index 068cdb1877c..516b5ba679f 100644 --- a/repos/spack_repo/builtin/packages/aotriton/package.py +++ b/repos/spack_repo/builtin/packages/aotriton/package.py @@ -48,6 +48,9 @@ class Aotriton(CMakePackage): depends_on("pkgconfig", type="build") conflicts("^openssl@3.3.0") + # https://github.com/ROCm/aotriton/blob/main/README.md?plain=1#L24 + conflicts("%gcc@:11.3", when="@0.9b:", msg="The binary delivery is compiled with gcc13") + # ROCm dependencies depends_on("hip", type="build") depends_on("llvm-amdgpu", type="build") diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 48049f1a173..3cc3cc7e069 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -73,7 +73,8 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): variant("rocm", default=False, description="Use ROCm") variant("cudnn", default=not is_darwin, description="Use cuDNN", when="+cuda") variant("fbgemm", default=True, description="Use FBGEMM (quantized 8-bit server operators)") - variant("kineto", default=True, description="Use Kineto profiling library", when="@1.8:") + variant("kineto", default=True, description="Use Kineto profiling library", when="@1.8: ~rocm") + variant("kineto", default=False, description="Use Kineto profiling library", when="@1.8: +rocm") variant("magma", default=not is_darwin, description="Use MAGMA", when="+cuda") variant("metal", default=is_darwin, description="Use Metal for Caffe2 iOS build") variant( @@ -90,9 +91,12 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): variant("numpy", default=True, description="Use NumPy") variant("openmp", default=True, description="Use OpenMP for parallel code") variant("qnnpack", default=True, description="Use QNNPACK (quantized 8-bit operators)") - variant("valgrind", default=True, description="Use Valgrind", when="@1.8: platform=linux") - variant("xnnpack", default=True, description="Use XNNPACK") - variant("mkldnn", default=True, description="Use MKLDNN") + variant("valgrind", default=True, description="Use Valgrind", when="@1.8: platform=linux ~rocm") + variant("valgrind", default=False, description="Use Valgrind", when="@1.8: platform=linux +rocm") + variant("xnnpack", default=True, description="Use XNNPACK", when="~rocm") + variant("xnnpack", default=False, description="Use XNNPACK", when="+rocm") + variant("mkldnn", default=True, description="Use MKLDNN", when="~rocm") + variant("mkldnn", default=False, description="Use MKLDNN", when="+rocm") variant("distributed", default=True, description="Use distributed") variant("mpi", default=True, description="Use MPI for Caffe2", when="+distributed") variant("ucc", default=False, description="Use UCC", when="@1.13: +distributed") @@ -117,6 +121,11 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): conflicts("+cuda+rocm") conflicts("+gloo+rocm") + conflicts("+mkldnn+rocm") + conflicts("+valgrind+rocm") + conflicts("+kineto+rocm") + conflicts("+caffe2+rocm") + conflicts("+xnnpack+rocm") conflicts("+rocm", when="@2.3", msg="Rocm doesn't support py-torch 2.3 release") conflicts("+rocm", when="@2.4", msg="Rocm doesn't support py-torch 2.4 release") conflicts("+tensorpipe", when="+rocm ^hip@:5.1", msg="TensorPipe not supported until ROCm 5.2") @@ -305,6 +314,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("rocblas") depends_on("miopen-hip") depends_on("rocminfo") + depends_on("hipsparselt", when="@2.8:") depends_on("aotriton@0.8.1b", when="@2.5:2.6") depends_on("aotriton@0.9.1b", when="@2.7:") depends_on("composable-kernel@:6.3.2", when="@2.5") @@ -335,6 +345,12 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): sha256="5e56556a5698e6c43d0e7e9e3da6d7d819a4886bcd717e7b8e22ec08414a0b66", when="@2.8.0", ) + # https://github.com/pytorch/pytorch/pull/156486 + patch( + "https://github.com/pytorch/pytorch/commit/a23f4471b952d8cd630b860639e0aaa9be957d60.patch?full_index=1", + sha256="c99622bab1f2bd35674e2ee978a7b8896bb0b8e5d50172c4c60e691a2151ec9f", + when="@2.8.0 +rocm", + ) # https://github.com/pytorch/pytorch/issues/151592 patch("macos_rpath.patch", when="@2.7:") diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index c81d5bd4fe2..8337e1211b4 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -47,31 +47,30 @@ spack: - py-keras2onnx # PyTorch - # Does not yet support Spack-installed ROCm - # - py-botorch - # - py-efficientnet-pytorch - # - py-gpytorch - # - py-kornia - # - py-lightning - # - py-pytorch-gradual-warmup-lr - # - py-pytorch-lightning - # - py-segmentation-models-pytorch - # - py-timm - # - py-torch - # - py-torch-cluster - # - py-torch-geometric - # - py-torch-nvidia-apex - # - py-torch-scatter - # - py-torch-sparse - # - py-torch-spline-conv - # - py-torchaudio - # - py-torchdata - # - py-torchfile - # - py-torchgeo - # - py-torchmetrics - # - py-torchtext - # - py-torchvision - # - py-vector-quantize-pytorch + - py-botorch + - py-efficientnet-pytorch + - py-gpytorch + - py-kornia + - py-lightning + - py-pytorch-gradual-warmup-lr + - py-pytorch-lightning + - py-segmentation-models-pytorch + - py-timm + - py-torch + - py-torch-cluster + - py-torch-geometric + - py-torch-nvidia-apex + - py-torch-scatter + - py-torch-sparse + - py-torch-spline-conv + - py-torchaudio + - py-torchdata + - py-torchfile + - py-torchgeo + - py-torchmetrics + - py-torchtext + - py-torchvision + - py-vector-quantize-pytorch # scikit-learn - py-scikit-learn From 47f737cdd8dd0c3862141d547825419c94dd82fd Mon Sep 17 00:00:00 2001 From: renjithravindrankannath <94420380+renjithravindrankannath@users.noreply.github.com> Date: Fri, 19 Sep 2025 07:46:45 +0000 Subject: [PATCH 04/84] [@spackbot] updating style on behalf of renjithravindrankannath --- .../spack_repo/builtin/packages/py_torch/package.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 3cc3cc7e069..f4ebeb0d055 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -74,7 +74,9 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): variant("cudnn", default=not is_darwin, description="Use cuDNN", when="+cuda") variant("fbgemm", default=True, description="Use FBGEMM (quantized 8-bit server operators)") variant("kineto", default=True, description="Use Kineto profiling library", when="@1.8: ~rocm") - variant("kineto", default=False, description="Use Kineto profiling library", when="@1.8: +rocm") + variant( + "kineto", default=False, description="Use Kineto profiling library", when="@1.8: +rocm" + ) variant("magma", default=not is_darwin, description="Use MAGMA", when="+cuda") variant("metal", default=is_darwin, description="Use Metal for Caffe2 iOS build") variant( @@ -91,8 +93,12 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): variant("numpy", default=True, description="Use NumPy") variant("openmp", default=True, description="Use OpenMP for parallel code") variant("qnnpack", default=True, description="Use QNNPACK (quantized 8-bit operators)") - variant("valgrind", default=True, description="Use Valgrind", when="@1.8: platform=linux ~rocm") - variant("valgrind", default=False, description="Use Valgrind", when="@1.8: platform=linux +rocm") + variant( + "valgrind", default=True, description="Use Valgrind", when="@1.8: platform=linux ~rocm" + ) + variant( + "valgrind", default=False, description="Use Valgrind", when="@1.8: platform=linux +rocm" + ) variant("xnnpack", default=True, description="Use XNNPACK", when="~rocm") variant("xnnpack", default=False, description="Use XNNPACK", when="+rocm") variant("mkldnn", default=True, description="Use MKLDNN", when="~rocm") From e57ab94e700b8d23d3b45ee910e9c3fbb80c6b82 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Mon, 22 Sep 2025 14:29:38 -0700 Subject: [PATCH 05/84] enabling horovod and keras in ci --- stacks/ml-linux-x86_64-rocm/spack.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index 8337e1211b4..2449a944431 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -29,7 +29,7 @@ spack: specs: # Horovod - # - py-horovod + - py-horovod # Hugging Face - py-transformers @@ -40,8 +40,8 @@ spack: # Keras - py-keras backend=tensorflow - # - py-keras backend=jax - # - py-keras backend=torch + - py-keras backend=jax + - py-keras backend=torch - py-keras-applications - py-keras-preprocessing - py-keras2onnx From 326901ad53ef3988e8c5704a8a1b0ae96bbad536 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Mon, 22 Sep 2025 23:02:49 -0700 Subject: [PATCH 06/84] Increasing timeout for ck and aotriton --- .ci/gitlab/configs/linux/ci.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.ci/gitlab/configs/linux/ci.yaml b/.ci/gitlab/configs/linux/ci.yaml index 8dee200d543..9135d9f16d3 100644 --- a/.ci/gitlab/configs/linux/ci.yaml +++ b/.ci/gitlab/configs/linux/ci.yaml @@ -10,8 +10,10 @@ ci: - match: - composable-kernel - py-torch + - aotriton - wrf build-job: + timeout: 600 minutes tags: [ "spack", "huge" ] variables: CI_JOB_SIZE: huge From 7a65c3967a02e9cb1dea5e423d03831db1e4887e Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Wed, 24 Sep 2025 16:58:49 -0700 Subject: [PATCH 07/84] Increase timout for ck and aotriton --- .ci/gitlab/configs/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/gitlab/configs/ci.yaml b/.ci/gitlab/configs/ci.yaml index c96ffb0d283..86c4701387a 100644 --- a/.ci/gitlab/configs/ci.yaml +++ b/.ci/gitlab/configs/ci.yaml @@ -22,7 +22,7 @@ ci: script:: - - if [ -n "$SPACK_EXTRA_MIRROR" ]; then spack mirror add local "${SPACK_EXTRA_MIRROR}/${SPACK_CI_STACK_NAME}"; fi - spack config blame mirrors - - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 300 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) + - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 1200 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) after_script: - - cat /proc/loadavg || true - cat /proc/meminfo | grep 'MemTotal\|MemFree' || true From 6ad618d12bf85ad9bf1bff0b192656380c5e0262 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Thu, 23 Oct 2025 16:35:52 -0700 Subject: [PATCH 08/84] aotriton 0.10b and related changesin py-torchwq --- repos/spack_repo/builtin/packages/aotriton/package.py | 4 ++++ repos/spack_repo/builtin/packages/py_torch/package.py | 10 +++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/repos/spack_repo/builtin/packages/aotriton/package.py b/repos/spack_repo/builtin/packages/aotriton/package.py index 516b5ba679f..701ea50505e 100644 --- a/repos/spack_repo/builtin/packages/aotriton/package.py +++ b/repos/spack_repo/builtin/packages/aotriton/package.py @@ -18,6 +18,9 @@ class Aotriton(CMakePackage): maintainers("afzpatel", "srekolam", "renjithravindrankannath") license("MIT") + version( + "0.10b", tag="0.10b", commit="6fca155f4deeb8d9529326f7b69f350aeeb93477", submodules=True + ) version( "0.9.2b", tag="0.9.2b", commit="b388d223d8c7213545603e00f6f3148c54d1f525", submodules=True ) @@ -46,6 +49,7 @@ class Aotriton(CMakePackage): depends_on("zlib-api", type="link") depends_on("xz", type="link") depends_on("pkgconfig", type="build") + depends_on("llvm +mlir", when="%gcc") conflicts("^openssl@3.3.0") # https://github.com/ROCm/aotriton/blob/main/README.md?plain=1#L24 diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index f4ebeb0d055..31eb341db93 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -320,9 +320,13 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("rocblas") depends_on("miopen-hip") depends_on("rocminfo") - depends_on("hipsparselt", when="@2.8:") - depends_on("aotriton@0.8.1b", when="@2.5:2.6") - depends_on("aotriton@0.9.1b", when="@2.7:") + depends_on("hipsparselt", when="@2.8:") + depends_on("aotriton@0.4b", when="@2.3") + depends_on("aotriton@0.6b", when="@2.4") + depends_on("aotriton@0.7b:0.8b", when="@2.5") + depends_on("aotriton@0.8b", when="@2.6") + depends_on("aotriton@0.9b:0.10b", when="@2.7") + depends_on("aotriton@0.10b", when="@2.8") depends_on("composable-kernel@:6.3.2", when="@2.5") depends_on("composable-kernel@6.3.2:", when="@2.6:") depends_on("mpi", when="+mpi") From 9f956ed78afae622b582c03ca2c1d574d740a905 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Thu, 23 Oct 2025 22:08:43 -0700 Subject: [PATCH 09/84] stlye error fix --- repos/spack_repo/builtin/packages/py_torch/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 31eb341db93..926a8a42335 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -320,7 +320,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("rocblas") depends_on("miopen-hip") depends_on("rocminfo") - depends_on("hipsparselt", when="@2.8:") + depends_on("hipsparselt", when="@2.8:") depends_on("aotriton@0.4b", when="@2.3") depends_on("aotriton@0.6b", when="@2.4") depends_on("aotriton@0.7b:0.8b", when="@2.5") From 9f60ff29323773efd96898b102e9da3919f65dd9 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Thu, 23 Oct 2025 22:40:01 -0700 Subject: [PATCH 10/84] fix audit error --- repos/spack_repo/builtin/packages/py_torch/package.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 926a8a42335..8dd658e4e3c 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -321,10 +321,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("miopen-hip") depends_on("rocminfo") depends_on("hipsparselt", when="@2.8:") - depends_on("aotriton@0.4b", when="@2.3") - depends_on("aotriton@0.6b", when="@2.4") - depends_on("aotriton@0.7b:0.8b", when="@2.5") - depends_on("aotriton@0.8b", when="@2.6") + depends_on("aotriton@0.8b", when="@2.5:") depends_on("aotriton@0.9b:0.10b", when="@2.7") depends_on("aotriton@0.10b", when="@2.8") depends_on("composable-kernel@:6.3.2", when="@2.5") From bdf548d14e77da62322b0df8e7f2d8edf60b061a Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Fri, 24 Oct 2025 00:36:46 -0700 Subject: [PATCH 11/84] aotriton require specific commit of llvm --- .../builtin/packages/aotriton/package.py | 2 +- .../builtin/packages/aotriton_llvm/package.py | 64 +++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 repos/spack_repo/builtin/packages/aotriton_llvm/package.py diff --git a/repos/spack_repo/builtin/packages/aotriton/package.py b/repos/spack_repo/builtin/packages/aotriton/package.py index 701ea50505e..5b75ce12146 100644 --- a/repos/spack_repo/builtin/packages/aotriton/package.py +++ b/repos/spack_repo/builtin/packages/aotriton/package.py @@ -49,7 +49,7 @@ class Aotriton(CMakePackage): depends_on("zlib-api", type="link") depends_on("xz", type="link") depends_on("pkgconfig", type="build") - depends_on("llvm +mlir", when="%gcc") + depends_on("aotriton-llvm@20.1.0", when="%gcc @0.9b:") conflicts("^openssl@3.3.0") # https://github.com/ROCm/aotriton/blob/main/README.md?plain=1#L24 diff --git a/repos/spack_repo/builtin/packages/aotriton_llvm/package.py b/repos/spack_repo/builtin/packages/aotriton_llvm/package.py new file mode 100644 index 00000000000..a7395d791ab --- /dev/null +++ b/repos/spack_repo/builtin/packages/aotriton_llvm/package.py @@ -0,0 +1,64 @@ +# Copyright Spack Project Developers. See COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import os +import re +import sys + +from spack_repo.builtin.build_systems.cmake import CMakePackage, generator +from spack_repo.builtin.build_systems.compiler import CompilerPackage +from spack_repo.builtin.build_systems.cuda import CudaPackage + +from spack.package import * + + +class AotritonLlvm(CMakePackage, CudaPackage,CompilerPackage): + """FIXME: Put a proper description of your package here.""" + + homepage = "https://github.com/llvm/llvm-project" + url = "https://github.com/llvm/llvm-project/archive/llvmorg-7.1.0.tar.gz" + git = "https://github.com/llvm/llvm-project" + #url = "https://oaitriton.blob.core.windows.net/public/llvm-builds/llvm-86b69c31-ubuntu-x64.tar.gz" + + version("main", branch="main") + version("20.1.0", commit="86b69c31642e98f8357df62c09d118ad1da4e16a") + generator("ninja") + depends_on("cxx", type="build") + depends_on("c", type="build") + depends_on("cmake@3.13.4:", type="build") + depends_on("python", type="build") + depends_on("z3", type="link") + depends_on("zlib-api", type="link") + depends_on("ncurses+termlib", type="link") + depends_on("libxml2", type="link") + depends_on("py-pybind11") + depends_on("pkgconfig", type="build") + + root_cmakelists_dir = "llvm" + + def _standard_flag(self, *, language, standard): + flags = { + "cxx": {"11": "-std=c++11", "14": "-std=c++14", "17": "-std=c++17"}, + "c": {"99": "-std=c99", "11": "-std=c1x"}, + } + return flags[language][standard] + + def cmake_args(self): + llvm_projects = ["llvm", "mlir"] + args = [ + self.define("LLVM_ENABLE_Z3_SOLVER", "OFF"), + self.define("CMAKE_BUILD_TYPE", "Release"), + self.define("LLVM_REQUIRES_RTTI", True), + self.define("LLVM_ENABLE_LIBXML2", False), + self.define("LLVM_ENABLE_RTTI", "ON"), + self.define("CMAKE_INSTALL_LIBDIR", "lib"), + self.define("CMAKE_CXX_STANDARD", 17), + self.define("LLVM_BUILD_UTILS", "ON"), + self.define("LLVM_TARGETS_TO_BUILD", "host;NVPTX;AMDGPU"), + self.define("MLIR_ENABLE_BINDINGS_PYTHON", "ON"), + self.define("LLVM_ENABLE_TERMINFO", "OFF") + ] + args.append(self.define("LLVM_ENABLE_PROJECTS", llvm_projects)) + return args + From 89f3a2572f003f07b10070b0a0bf8a615baa4ea0 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath <94420380+renjithravindrankannath@users.noreply.github.com> Date: Fri, 24 Oct 2025 07:43:25 +0000 Subject: [PATCH 12/84] [@spackbot] updating style on behalf of renjithravindrankannath --- .../spack_repo/builtin/packages/aotriton_llvm/package.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/repos/spack_repo/builtin/packages/aotriton_llvm/package.py b/repos/spack_repo/builtin/packages/aotriton_llvm/package.py index a7395d791ab..3278a8efd9b 100644 --- a/repos/spack_repo/builtin/packages/aotriton_llvm/package.py +++ b/repos/spack_repo/builtin/packages/aotriton_llvm/package.py @@ -13,14 +13,14 @@ from spack.package import * -class AotritonLlvm(CMakePackage, CudaPackage,CompilerPackage): +class AotritonLlvm(CMakePackage, CudaPackage, CompilerPackage): """FIXME: Put a proper description of your package here.""" homepage = "https://github.com/llvm/llvm-project" url = "https://github.com/llvm/llvm-project/archive/llvmorg-7.1.0.tar.gz" git = "https://github.com/llvm/llvm-project" - #url = "https://oaitriton.blob.core.windows.net/public/llvm-builds/llvm-86b69c31-ubuntu-x64.tar.gz" - + # url = "https://oaitriton.blob.core.windows.net/public/llvm-builds/llvm-86b69c31-ubuntu-x64.tar.gz" + version("main", branch="main") version("20.1.0", commit="86b69c31642e98f8357df62c09d118ad1da4e16a") generator("ninja") @@ -57,8 +57,7 @@ def cmake_args(self): self.define("LLVM_BUILD_UTILS", "ON"), self.define("LLVM_TARGETS_TO_BUILD", "host;NVPTX;AMDGPU"), self.define("MLIR_ENABLE_BINDINGS_PYTHON", "ON"), - self.define("LLVM_ENABLE_TERMINFO", "OFF") + self.define("LLVM_ENABLE_TERMINFO", "OFF"), ] args.append(self.define("LLVM_ENABLE_PROJECTS", llvm_projects)) return args - From 349cdbf6615b8a9c792bc8d24ec76b35ca2a386d Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Fri, 24 Oct 2025 00:48:22 -0700 Subject: [PATCH 13/84] removing unused imports --- repos/spack_repo/builtin/packages/aotriton_llvm/package.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/repos/spack_repo/builtin/packages/aotriton_llvm/package.py b/repos/spack_repo/builtin/packages/aotriton_llvm/package.py index 3278a8efd9b..ebccec849fe 100644 --- a/repos/spack_repo/builtin/packages/aotriton_llvm/package.py +++ b/repos/spack_repo/builtin/packages/aotriton_llvm/package.py @@ -2,10 +2,6 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) -import os -import re -import sys - from spack_repo.builtin.build_systems.cmake import CMakePackage, generator from spack_repo.builtin.build_systems.compiler import CompilerPackage from spack_repo.builtin.build_systems.cuda import CudaPackage From 5f599026575a1f37abc37f2955c6f54d26621532 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Fri, 24 Oct 2025 01:18:52 -0700 Subject: [PATCH 14/84] audit check error fix --- repos/spack_repo/builtin/packages/aotriton_llvm/package.py | 1 - 1 file changed, 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/aotriton_llvm/package.py b/repos/spack_repo/builtin/packages/aotriton_llvm/package.py index ebccec849fe..cca3748ca28 100644 --- a/repos/spack_repo/builtin/packages/aotriton_llvm/package.py +++ b/repos/spack_repo/builtin/packages/aotriton_llvm/package.py @@ -10,7 +10,6 @@ class AotritonLlvm(CMakePackage, CudaPackage, CompilerPackage): - """FIXME: Put a proper description of your package here.""" homepage = "https://github.com/llvm/llvm-project" url = "https://github.com/llvm/llvm-project/archive/llvmorg-7.1.0.tar.gz" From 0ddd68e62d3d74614e5e47cf9f29e196e13469d8 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Fri, 24 Oct 2025 11:26:22 -0700 Subject: [PATCH 15/84] aotriton-llvm update and related changes --- repos/spack_repo/builtin/packages/aotriton/package.py | 7 ++++++- .../builtin/packages/aotriton_llvm/package.py | 10 ++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/repos/spack_repo/builtin/packages/aotriton/package.py b/repos/spack_repo/builtin/packages/aotriton/package.py index 5b75ce12146..f0285d30091 100644 --- a/repos/spack_repo/builtin/packages/aotriton/package.py +++ b/repos/spack_repo/builtin/packages/aotriton/package.py @@ -49,7 +49,12 @@ class Aotriton(CMakePackage): depends_on("zlib-api", type="link") depends_on("xz", type="link") depends_on("pkgconfig", type="build") - depends_on("aotriton-llvm@20.1.0", when="%gcc @0.9b:") + + # build llvm version with mlir with the commit that matches inside the llvm-hash.txt + depends_on("aotriton-llvm@0.10", when="@0.10b") + depends_on("aotriton-llvm@0.9", when="@0.9b:0.9.2b") + depends_on("aotriton-llvm@0.8", when="@0.8:0.8.2b") + conflicts("^openssl@3.3.0") # https://github.com/ROCm/aotriton/blob/main/README.md?plain=1#L24 diff --git a/repos/spack_repo/builtin/packages/aotriton_llvm/package.py b/repos/spack_repo/builtin/packages/aotriton_llvm/package.py index cca3748ca28..d7949722b84 100644 --- a/repos/spack_repo/builtin/packages/aotriton_llvm/package.py +++ b/repos/spack_repo/builtin/packages/aotriton_llvm/package.py @@ -12,12 +12,13 @@ class AotritonLlvm(CMakePackage, CudaPackage, CompilerPackage): homepage = "https://github.com/llvm/llvm-project" - url = "https://github.com/llvm/llvm-project/archive/llvmorg-7.1.0.tar.gz" git = "https://github.com/llvm/llvm-project" - # url = "https://oaitriton.blob.core.windows.net/public/llvm-builds/llvm-86b69c31-ubuntu-x64.tar.gz" + url = "https://github.com/llvm/llvm-project/archive/llvmorg-7.1.0.tar.gz" - version("main", branch="main") - version("20.1.0", commit="86b69c31642e98f8357df62c09d118ad1da4e16a") + version("main", commit="b5cc222d7429fe6f18c787f633d5262fac2e676f") + version("0.10", commit="3c709802d31b5bc5ed3af8284b40593ff39b9eec") + version("0.9", commit="86b69c31642e98f8357df62c09d118ad1da4e16a") + version("0.8", commit="bd9145c8c21334e099d51b3e66f49d51d24931ee") generator("ninja") depends_on("cxx", type="build") depends_on("c", type="build") @@ -29,6 +30,7 @@ class AotritonLlvm(CMakePackage, CudaPackage, CompilerPackage): depends_on("libxml2", type="link") depends_on("py-pybind11") depends_on("pkgconfig", type="build") + depends_on("py-nanobind", when="@0.10") root_cmakelists_dir = "llvm" From 5294a9be1083ef3343a3445553c7c72d65344ee8 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Fri, 24 Oct 2025 12:00:05 -0700 Subject: [PATCH 16/84] Adding docstring for aotriton-llvm --- repos/spack_repo/builtin/packages/aotriton_llvm/package.py | 1 + 1 file changed, 1 insertion(+) diff --git a/repos/spack_repo/builtin/packages/aotriton_llvm/package.py b/repos/spack_repo/builtin/packages/aotriton_llvm/package.py index d7949722b84..95c862582bc 100644 --- a/repos/spack_repo/builtin/packages/aotriton_llvm/package.py +++ b/repos/spack_repo/builtin/packages/aotriton_llvm/package.py @@ -10,6 +10,7 @@ class AotritonLlvm(CMakePackage, CudaPackage, CompilerPackage): + """Package for aotriton-llvm: A custom LLVM build for AoTriton.""" homepage = "https://github.com/llvm/llvm-project" git = "https://github.com/llvm/llvm-project" From 3ec901a6436de127a4a7f5f54f5d28aab747b1d5 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Mon, 27 Oct 2025 21:47:37 -0700 Subject: [PATCH 17/84] Aotriton and py-torch dependency fixes --- .../builtin/packages/aotriton/package.py | 19 +++++++++++++++++++ .../builtin/packages/py_torch/package.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/aotriton/package.py b/repos/spack_repo/builtin/packages/aotriton/package.py index f0285d30091..bf53f605fbb 100644 --- a/repos/spack_repo/builtin/packages/aotriton/package.py +++ b/repos/spack_repo/builtin/packages/aotriton/package.py @@ -67,6 +67,7 @@ class Aotriton(CMakePackage): depends_on("hsa-rocr-dev", type="build") def patch(self): + src = self.stage.source_path if self.spec.satisfies("^hip"): filter_file( "/opt/rocm/llvm/bin/ld.lld", @@ -74,6 +75,24 @@ def patch(self): "third_party/triton/third_party/amd/backend/compiler.py", string=True, ) + filter_file( + r"LLVM_INCLUDE_DIRS", + f"{self.spec['aotriton-llvm'].prefix}/include", + "third_party/triton/python/setup.py", + string=True, + ) + filter_file( + r"LLVM_LIBRARY_DIR", + f"{self.spec['aotriton-llvm'].prefix}/lib", + "third_party/triton/python/setup.py", + string=True, + ) + filter_file( + r"LLVM_SYSPATH", + f"{self.spec['aotriton-llvm'].prefix}", + "third_party/triton/python/setup.py", + string=True, + ) def setup_build_environment(self, env: EnvironmentModifications) -> None: """Set environment variables used to control the build""" diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 8dd658e4e3c..2d82ff6ab93 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -322,7 +322,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("rocminfo") depends_on("hipsparselt", when="@2.8:") depends_on("aotriton@0.8b", when="@2.5:") - depends_on("aotriton@0.9b:0.10b", when="@2.7") + depends_on("aotriton@0.9.2b", when="@2.7") depends_on("aotriton@0.10b", when="@2.8") depends_on("composable-kernel@:6.3.2", when="@2.5") depends_on("composable-kernel@6.3.2:", when="@2.6:") From 96aafb6569af0f9b56a5a817d251f94214daf558 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath <94420380+renjithravindrankannath@users.noreply.github.com> Date: Tue, 28 Oct 2025 04:51:35 +0000 Subject: [PATCH 18/84] [@spackbot] updating style on behalf of renjithravindrankannath --- .../builtin/packages/aotriton/package.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/repos/spack_repo/builtin/packages/aotriton/package.py b/repos/spack_repo/builtin/packages/aotriton/package.py index bf53f605fbb..8261b1ebed2 100644 --- a/repos/spack_repo/builtin/packages/aotriton/package.py +++ b/repos/spack_repo/builtin/packages/aotriton/package.py @@ -76,23 +76,23 @@ def patch(self): string=True, ) filter_file( - r"LLVM_INCLUDE_DIRS", - f"{self.spec['aotriton-llvm'].prefix}/include", - "third_party/triton/python/setup.py", - string=True, - ) + r"LLVM_INCLUDE_DIRS", + f"{self.spec['aotriton-llvm'].prefix}/include", + "third_party/triton/python/setup.py", + string=True, + ) filter_file( - r"LLVM_LIBRARY_DIR", - f"{self.spec['aotriton-llvm'].prefix}/lib", - "third_party/triton/python/setup.py", - string=True, - ) + r"LLVM_LIBRARY_DIR", + f"{self.spec['aotriton-llvm'].prefix}/lib", + "third_party/triton/python/setup.py", + string=True, + ) filter_file( - r"LLVM_SYSPATH", - f"{self.spec['aotriton-llvm'].prefix}", - "third_party/triton/python/setup.py", - string=True, - ) + r"LLVM_SYSPATH", + f"{self.spec['aotriton-llvm'].prefix}", + "third_party/triton/python/setup.py", + string=True, + ) def setup_build_environment(self, env: EnvironmentModifications) -> None: """Set environment variables used to control the build""" From 6b8fe85ca3cdaca51035ed2f2145d49cf387505a Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Mon, 27 Oct 2025 22:11:10 -0700 Subject: [PATCH 19/84] Aotriton-llvm dependency fixes and style fix --- repos/spack_repo/builtin/packages/aotriton/package.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/repos/spack_repo/builtin/packages/aotriton/package.py b/repos/spack_repo/builtin/packages/aotriton/package.py index 8261b1ebed2..1f713ca92a5 100644 --- a/repos/spack_repo/builtin/packages/aotriton/package.py +++ b/repos/spack_repo/builtin/packages/aotriton/package.py @@ -51,9 +51,9 @@ class Aotriton(CMakePackage): depends_on("pkgconfig", type="build") # build llvm version with mlir with the commit that matches inside the llvm-hash.txt - depends_on("aotriton-llvm@0.10", when="@0.10b") - depends_on("aotriton-llvm@0.9", when="@0.9b:0.9.2b") - depends_on("aotriton-llvm@0.8", when="@0.8:0.8.2b") + depends_on("aotriton-llvm@0.10", when="@0.10") + depends_on("aotriton-llvm@0.9", when="@0.9") + depends_on("aotriton-llvm@0.8", when="@0.8") conflicts("^openssl@3.3.0") @@ -67,7 +67,6 @@ class Aotriton(CMakePackage): depends_on("hsa-rocr-dev", type="build") def patch(self): - src = self.stage.source_path if self.spec.satisfies("^hip"): filter_file( "/opt/rocm/llvm/bin/ld.lld", From 07fd5a8b7a334da3513cf534c5c42b8210915579 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Tue, 4 Nov 2025 12:30:06 -0800 Subject: [PATCH 20/84] aotriton 10.0 updates --- .../builtin/packages/aotriton/package.py | 58 +++++++++++++------ .../builtin/packages/py_torch/package.py | 5 +- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/repos/spack_repo/builtin/packages/aotriton/package.py b/repos/spack_repo/builtin/packages/aotriton/package.py index 1f713ca92a5..9ac13eaad63 100644 --- a/repos/spack_repo/builtin/packages/aotriton/package.py +++ b/repos/spack_repo/builtin/packages/aotriton/package.py @@ -74,24 +74,45 @@ def patch(self): "third_party/triton/third_party/amd/backend/compiler.py", string=True, ) - filter_file( - r"LLVM_INCLUDE_DIRS", - f"{self.spec['aotriton-llvm'].prefix}/include", - "third_party/triton/python/setup.py", - string=True, - ) - filter_file( - r"LLVM_LIBRARY_DIR", - f"{self.spec['aotriton-llvm'].prefix}/lib", - "third_party/triton/python/setup.py", - string=True, - ) - filter_file( - r"LLVM_SYSPATH", - f"{self.spec['aotriton-llvm'].prefix}", - "third_party/triton/python/setup.py", - string=True, - ) + + if self.spec.satisfies("@:0.9"): + filter_file( + r"LLVM_INCLUDE_DIRS", + f"{self.spec['aotriton-llvm'].prefix}/include", + "third_party/triton/python/setup.py", + string=True, + ) + filter_file( + r"LLVM_LIBRARY_DIR", + f"{self.spec['aotriton-llvm'].prefix}/lib", + "third_party/triton/python/setup.py", + string=True, + ) + filter_file( + r"LLVM_SYSPATH", + f"{self.spec['aotriton-llvm'].prefix}", + "third_party/triton/python/setup.py", + string=True, + ) + if self.spec.satisfies("@10.0"): + filter_file( + r"LLVM_INCLUDE_DIRS", + f"{self.spec['aotriton-llvm'].prefix}/include", + "third_party/triton/setup.py", + string=True, + ) + filter_file( + r"LLVM_LIBRARY_DIR", + f"{self.spec['aotriton-llvm'].prefix}/lib", + "third_party/triton/setup.py", + string=True, + ) + filter_file( + r"LLVM_SYSPATH", + f"{self.spec['aotriton-llvm'].prefix}", + "third_party/triton/setup.py", + string=True, + ) def setup_build_environment(self, env: EnvironmentModifications) -> None: """Set environment variables used to control the build""" @@ -103,4 +124,5 @@ def setup_build_environment(self, env: EnvironmentModifications) -> None: def cmake_args(self): args = [] args.append(self.define("AOTRITON_GPU_BUILD_TIMEOUT", 0)) + args.append(self.define("AOTRITON_NOIMAGE_MODE","ON")) return args diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 2d82ff6ab93..e645fb63dba 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -101,8 +101,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): ) variant("xnnpack", default=True, description="Use XNNPACK", when="~rocm") variant("xnnpack", default=False, description="Use XNNPACK", when="+rocm") - variant("mkldnn", default=True, description="Use MKLDNN", when="~rocm") - variant("mkldnn", default=False, description="Use MKLDNN", when="+rocm") + variant("mkldnn", default=True, description="Use MKLDNN") variant("distributed", default=True, description="Use distributed") variant("mpi", default=True, description="Use MPI for Caffe2", when="+distributed") variant("ucc", default=False, description="Use UCC", when="@1.13: +distributed") @@ -127,7 +126,6 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): conflicts("+cuda+rocm") conflicts("+gloo+rocm") - conflicts("+mkldnn+rocm") conflicts("+valgrind+rocm") conflicts("+kineto+rocm") conflicts("+caffe2+rocm") @@ -729,7 +727,6 @@ def enable_or_disable(variant, keyword="USE", var=None): enable_or_disable("qnnpack", var="PYTORCH_QNNPACK") enable_or_disable("valgrind") enable_or_disable("xnnpack") - enable_or_disable("mkldnn") enable_or_disable("distributed") enable_or_disable("mpi") enable_or_disable("ucc") From 43a436085c20233f1899c30a01aba680501ea953 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath <94420380+renjithravindrankannath@users.noreply.github.com> Date: Tue, 4 Nov 2025 20:33:53 +0000 Subject: [PATCH 21/84] [@spackbot] updating style on behalf of renjithravindrankannath --- .../builtin/packages/aotriton/package.py | 62 +++++++++---------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/repos/spack_repo/builtin/packages/aotriton/package.py b/repos/spack_repo/builtin/packages/aotriton/package.py index 9ac13eaad63..0ad41328ff8 100644 --- a/repos/spack_repo/builtin/packages/aotriton/package.py +++ b/repos/spack_repo/builtin/packages/aotriton/package.py @@ -77,42 +77,42 @@ def patch(self): if self.spec.satisfies("@:0.9"): filter_file( - r"LLVM_INCLUDE_DIRS", - f"{self.spec['aotriton-llvm'].prefix}/include", - "third_party/triton/python/setup.py", - string=True, - ) + r"LLVM_INCLUDE_DIRS", + f"{self.spec['aotriton-llvm'].prefix}/include", + "third_party/triton/python/setup.py", + string=True, + ) filter_file( - r"LLVM_LIBRARY_DIR", - f"{self.spec['aotriton-llvm'].prefix}/lib", - "third_party/triton/python/setup.py", - string=True, - ) + r"LLVM_LIBRARY_DIR", + f"{self.spec['aotriton-llvm'].prefix}/lib", + "third_party/triton/python/setup.py", + string=True, + ) filter_file( - r"LLVM_SYSPATH", - f"{self.spec['aotriton-llvm'].prefix}", - "third_party/triton/python/setup.py", - string=True, - ) + r"LLVM_SYSPATH", + f"{self.spec['aotriton-llvm'].prefix}", + "third_party/triton/python/setup.py", + string=True, + ) if self.spec.satisfies("@10.0"): filter_file( - r"LLVM_INCLUDE_DIRS", - f"{self.spec['aotriton-llvm'].prefix}/include", - "third_party/triton/setup.py", - string=True, - ) + r"LLVM_INCLUDE_DIRS", + f"{self.spec['aotriton-llvm'].prefix}/include", + "third_party/triton/setup.py", + string=True, + ) filter_file( - r"LLVM_LIBRARY_DIR", - f"{self.spec['aotriton-llvm'].prefix}/lib", - "third_party/triton/setup.py", - string=True, - ) + r"LLVM_LIBRARY_DIR", + f"{self.spec['aotriton-llvm'].prefix}/lib", + "third_party/triton/setup.py", + string=True, + ) filter_file( - r"LLVM_SYSPATH", - f"{self.spec['aotriton-llvm'].prefix}", - "third_party/triton/setup.py", - string=True, - ) + r"LLVM_SYSPATH", + f"{self.spec['aotriton-llvm'].prefix}", + "third_party/triton/setup.py", + string=True, + ) def setup_build_environment(self, env: EnvironmentModifications) -> None: """Set environment variables used to control the build""" @@ -124,5 +124,5 @@ def setup_build_environment(self, env: EnvironmentModifications) -> None: def cmake_args(self): args = [] args.append(self.define("AOTRITON_GPU_BUILD_TIMEOUT", 0)) - args.append(self.define("AOTRITON_NOIMAGE_MODE","ON")) + args.append(self.define("AOTRITON_NOIMAGE_MODE", "ON")) return args From 771df65d871cda2cf70c479f176822b9142afb0c Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Thu, 6 Nov 2025 02:33:13 -0800 Subject: [PATCH 22/84] rocm update for py-torch 2.9 --- .../packages/py_torch/Revert-PR159080.patch | 22 +++++++++++++++++++ .../builtin/packages/py_torch/package.py | 4 +++- 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 repos/spack_repo/builtin/packages/py_torch/Revert-PR159080.patch diff --git a/repos/spack_repo/builtin/packages/py_torch/Revert-PR159080.patch b/repos/spack_repo/builtin/packages/py_torch/Revert-PR159080.patch new file mode 100644 index 00000000000..78fa6eef91b --- /dev/null +++ b/repos/spack_repo/builtin/packages/py_torch/Revert-PR159080.patch @@ -0,0 +1,22 @@ +diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake +index 018bca837a5..132f9670ff3 100644 +--- a/cmake/public/LoadHIP.cmake ++++ b/cmake/public/LoadHIP.cmake +@@ -6,7 +6,7 @@ set(PYTORCH_FOUND_HIP FALSE) + # In the latter case, if /opt/rocm does not exist emit status + # message and return. + if(DEFINED ENV{ROCM_PATH}) +- file(TO_CMAKE_PATH "$ENV{ROCM_PATH}" ROCM_PATH) ++ set(ROCM_PATH $ENV{ROCM_PATH}) + if(NOT EXISTS ${ROCM_PATH}) + message(FATAL_ERROR + "ROCM_PATH environment variable is set to ${ROCM_PATH} but does not exist.\n" +@@ -31,7 +31,7 @@ if(NOT DEFINED ENV{MAGMA_HOME}) + set(MAGMA_HOME ${ROCM_PATH}/magma) + set(ENV{MAGMA_HOME} ${ROCM_PATH}/magma) + else() +- file(TO_CMAKE_PATH "$ENV{MAGMA_HOME}" MAGMA_HOME) ++ set(MAGMA_HOME $ENV{MAGMA_HOME}) + endif() + + # MIOpen isn't a part of HIP-SDK for Windows and hence, may have a different diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 110cdd1f15c..5d37e96116f 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -134,6 +134,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): conflicts("+xnnpack+rocm") conflicts("+rocm", when="@2.3", msg="Rocm doesn't support py-torch 2.3 release") conflicts("+rocm", when="@2.4", msg="Rocm doesn't support py-torch 2.4 release") + conflicts("+rocm", when="@2.8", msg="Rocm doesn't support py-torch 2.8 release") conflicts("+tensorpipe", when="+rocm ^hip@:5.1", msg="TensorPipe not supported until ROCm 5.2") conflicts("+breakpad", when="target=ppc64:") conflicts("+breakpad", when="target=ppc64le:") @@ -332,7 +333,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("hipsparselt", when="@2.8:") depends_on("aotriton@0.8b", when="@2.5:") depends_on("aotriton@0.9.2b", when="@2.7") - depends_on("aotriton@0.10b", when="@2.8") + depends_on("aotriton@0.10b", when="@2.8:") depends_on("composable-kernel@:6.3.2", when="@2.5") depends_on("composable-kernel@6.3.2:", when="@2.6:") depends_on("mpi", when="+mpi") @@ -435,6 +436,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): patch("PR152569-Update-spack-includes-2.5.patch", when="@2.5+rocm") patch("PR152569-Update-spack-includes-2.6.patch", when="@2.6+rocm") patch("PR152569-Update-spack-includes-2.7.patch", when="@2.7+rocm") + patch("Revert-PR159080.patch", when="@2.9+rocm") # https://github.com/pytorch/pytorch/pull/147993 # prevents pytorch from potentially using system version of config.h From 445001dbdda81e19685f90d8d544226be4ae1da4 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Thu, 6 Nov 2025 15:57:51 -0800 Subject: [PATCH 23/84] py-torch requires ck --- stacks/ml-linux-x86_64-rocm/spack.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index a6ceee53c62..87d68d4627f 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -15,8 +15,6 @@ spack: require: ~rocm gl: require: "osmesa" - miopen-hip: - require: ~ck mpi: require: openmpi py-jaxlib: From d256987cacdd9941ed0c4420176fe3df8bbbc896 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Fri, 14 Nov 2025 18:08:29 -0800 Subject: [PATCH 24/84] Limiting to py-torch temporarily --- stacks/ml-linux-x86_64-rocm/spack.yaml | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index 63aec2524ad..724c274680b 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -45,35 +45,25 @@ spack: # Keras - py-keras backend=tensorflow - - py-keras backend=jax - - py-keras backend=torch - - py-keras-applications - - py-keras-preprocessing - - py-keras2onnx + # - py-keras backend=jax + # - py-keras backend=torch # PyTorch + # Does not yet support Spack-installed ROCm - py-botorch - - py-efficientnet-pytorch - py-gpytorch - py-kornia - py-lightning - - py-pytorch-gradual-warmup-lr - py-pytorch-lightning - py-segmentation-models-pytorch - py-timm - py-torch - - py-torch-cluster - py-torch-geometric - py-torch-nvidia-apex - - py-torch-scatter - - py-torch-sparse - - py-torch-spline-conv - py-torchaudio - py-torchdata - - py-torchfile - py-torchgeo - py-torchmetrics - - py-torchtext - py-torchvision - py-vector-quantize-pytorch From 3434b791c9f9e6778b6d507162d1964ccd65f893 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Fri, 14 Nov 2025 19:00:26 -0800 Subject: [PATCH 25/84] masking py-kornia temporarily --- stacks/ml-linux-x86_64-rocm/spack.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index 724c274680b..4bbf46675f8 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -52,7 +52,7 @@ spack: # Does not yet support Spack-installed ROCm - py-botorch - py-gpytorch - - py-kornia + # - py-kornia - py-lightning - py-pytorch-lightning - py-segmentation-models-pytorch From f44564c00c3e0bee2ed972de096065c42c400939 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Sat, 15 Nov 2025 13:01:18 -0800 Subject: [PATCH 26/84] Updating python version and addressing other review comments --- .../builtin/packages/aotriton/package.py | 2 +- .../builtin/packages/py_torch/package.py | 15 +++------------ stacks/ml-linux-x86_64-rocm/spack.yaml | 6 +----- 3 files changed, 5 insertions(+), 18 deletions(-) diff --git a/repos/spack_repo/builtin/packages/aotriton/package.py b/repos/spack_repo/builtin/packages/aotriton/package.py index 0ad41328ff8..ea18ee90702 100644 --- a/repos/spack_repo/builtin/packages/aotriton/package.py +++ b/repos/spack_repo/builtin/packages/aotriton/package.py @@ -44,7 +44,7 @@ class Aotriton(CMakePackage): depends_on("py-filelock", type=("build", "run")) depends_on("cmake@3.26:", type="build") - depends_on("python@:3.11", type="build") + depends_on("python@3.13:", type="build") depends_on("z3", type="link") depends_on("zlib-api", type="link") depends_on("xz", type="link") diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 5d37e96116f..b9331d333e2 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -74,10 +74,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): variant("rocm", default=False, description="Use ROCm") variant("cudnn", default=not is_darwin, description="Use cuDNN", when="+cuda") variant("fbgemm", default=True, description="Use FBGEMM (quantized 8-bit server operators)") - variant("kineto", default=True, description="Use Kineto profiling library", when="@1.8: ~rocm") - variant( - "kineto", default=False, description="Use Kineto profiling library", when="@1.8: +rocm" - ) + variant("kineto", default=True, description="Use Kineto profiling library", when="@1.8:") variant("magma", default=not is_darwin, description="Use MAGMA", when="+cuda") variant("metal", default=is_darwin, description="Use Metal for Caffe2 iOS build") variant( @@ -94,14 +91,8 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): variant("numpy", default=True, description="Use NumPy") variant("openmp", default=True, description="Use OpenMP for parallel code") variant("qnnpack", default=True, description="Use QNNPACK (quantized 8-bit operators)") - variant( - "valgrind", default=True, description="Use Valgrind", when="@1.8: platform=linux ~rocm" - ) - variant( - "valgrind", default=False, description="Use Valgrind", when="@1.8: platform=linux +rocm" - ) - variant("xnnpack", default=True, description="Use XNNPACK", when="~rocm") - variant("xnnpack", default=False, description="Use XNNPACK", when="+rocm") + variant("valgrind", default=True, description="Use Valgrind", when="@1.8: platform=linux") + variant("xnnpack", default=True, description="Use XNNPACK") variant("mkldnn", default=True, description="Use MKLDNN") variant("distributed", default=True, description="Use distributed") variant("mpi", default=True, description="Use MPI for Caffe2", when="+distributed") diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index 4bbf46675f8..cbef4e3fded 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -33,9 +33,6 @@ spack: - "%c,cxx=gcc" specs: - # Horovod - - py-horovod - # Hugging Face - py-transformers @@ -49,10 +46,9 @@ spack: # - py-keras backend=torch # PyTorch - # Does not yet support Spack-installed ROCm - py-botorch - py-gpytorch - # - py-kornia + - py-kornia - py-lightning - py-pytorch-lightning - py-segmentation-models-pytorch From ad5cf5c3660b5dc8fb0f3c02be475c2d3a250600 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Mon, 17 Nov 2025 13:26:48 -0800 Subject: [PATCH 27/84] Correcting aotriton and python dependencies --- repos/spack_repo/builtin/packages/aotriton/package.py | 2 +- repos/spack_repo/builtin/packages/py_torch/package.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/repos/spack_repo/builtin/packages/aotriton/package.py b/repos/spack_repo/builtin/packages/aotriton/package.py index ea18ee90702..502955b16e4 100644 --- a/repos/spack_repo/builtin/packages/aotriton/package.py +++ b/repos/spack_repo/builtin/packages/aotriton/package.py @@ -44,7 +44,7 @@ class Aotriton(CMakePackage): depends_on("py-filelock", type=("build", "run")) depends_on("cmake@3.26:", type="build") - depends_on("python@3.13:", type="build") + depends_on("python", type="build") depends_on("z3", type="link") depends_on("zlib-api", type="link") depends_on("xz", type="link") diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index b9331d333e2..95e303172f2 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -322,7 +322,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("miopen-hip") depends_on("rocminfo") depends_on("hipsparselt", when="@2.8:") - depends_on("aotriton@0.8b", when="@2.5:") + depends_on("aotriton@0.8b", when="@2.5:2.6") depends_on("aotriton@0.9.2b", when="@2.7") depends_on("aotriton@0.10b", when="@2.8:") depends_on("composable-kernel@:6.3.2", when="@2.5") @@ -733,6 +733,7 @@ def enable_or_disable(variant, keyword="USE", var=None): enable_or_disable("qnnpack", var="PYTORCH_QNNPACK") enable_or_disable("valgrind") enable_or_disable("xnnpack") + enable_or_disable("mkldnn") enable_or_disable("distributed") enable_or_disable("mpi") enable_or_disable("ucc") From 47691529ff873948bc4f78e8aea7dc9f0118b815 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Mon, 24 Nov 2025 11:18:41 -0800 Subject: [PATCH 28/84] updating ck dependency --- repos/spack_repo/builtin/packages/py_torch/package.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 95e303172f2..46d3673b21e 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -321,12 +321,11 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("rocblas") depends_on("miopen-hip") depends_on("rocminfo") + depends_on("composable-kernel") depends_on("hipsparselt", when="@2.8:") depends_on("aotriton@0.8b", when="@2.5:2.6") depends_on("aotriton@0.9.2b", when="@2.7") depends_on("aotriton@0.10b", when="@2.8:") - depends_on("composable-kernel@:6.3.2", when="@2.5") - depends_on("composable-kernel@6.3.2:", when="@2.6:") depends_on("mpi", when="+mpi") depends_on("ucc", when="+ucc") depends_on("ucx", when="+ucc") From 509af9fc7107895a35082b18c8b03301be725199 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Tue, 25 Nov 2025 00:41:50 -0800 Subject: [PATCH 29/84] roctracer include path for kineto --- .../py_torch/PR152569-Update-spack-includes-2.7.patch | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.7.patch b/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.7.patch index 4392e00d76a..9ef8449e7a2 100644 --- a/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.7.patch +++ b/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.7.patch @@ -1,5 +1,5 @@ diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt -index d2d23b7..620a89f 100644 +index d2d23b7ab65..620a89f65cb 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -1379,13 +1379,6 @@ if(USE_ROCM) @@ -26,7 +26,7 @@ index d2d23b7..620a89f 100644 endif() diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake -index 58c74dd..d3e1ad4 100644 +index 58c74ddda35..88f1ad1ddf5 100644 --- a/cmake/public/LoadHIP.cmake +++ b/cmake/public/LoadHIP.cmake @@ -26,12 +26,6 @@ else() @@ -78,7 +78,7 @@ index 58c74dd..d3e1ad4 100644 find_package_and_print_version(amd_comgr REQUIRED) find_package_and_print_version(rocrand REQUIRED) find_package_and_print_version(hiprand REQUIRED) -@@ -171,7 +168,11 @@ if(HIP_FOUND) +@@ -171,10 +168,14 @@ if(HIP_FOUND) if(UNIX) find_package_and_print_version(rccl) find_package_and_print_version(hsa-runtime64 REQUIRED) @@ -89,4 +89,8 @@ index 58c74dd..d3e1ad4 100644 + if(UNIX) # roctx is part of roctracer find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib) +- ++ set(ROCTRACER_INCLUDE_DIR $ENV{ROCTRACER_INCLUDE_DIR}) + set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}") + if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0") From 1c9b69e73ac7b27e2238a6b0e740c3c6e9038466 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Tue, 25 Nov 2025 20:38:40 -0800 Subject: [PATCH 30/84] Updating hip dependency --- .../PR152569-Update-spack-includes-2.7.patch | 10 ++---- .../packages/py_torch/Revert-PR159080.patch | 22 ------------- .../builtin/packages/py_torch/package.py | 32 +++++++++---------- 3 files changed, 19 insertions(+), 45 deletions(-) delete mode 100644 repos/spack_repo/builtin/packages/py_torch/Revert-PR159080.patch diff --git a/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.7.patch b/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.7.patch index 9ef8449e7a2..4392e00d76a 100644 --- a/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.7.patch +++ b/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.7.patch @@ -1,5 +1,5 @@ diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt -index d2d23b7ab65..620a89f65cb 100644 +index d2d23b7..620a89f 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -1379,13 +1379,6 @@ if(USE_ROCM) @@ -26,7 +26,7 @@ index d2d23b7ab65..620a89f65cb 100644 endif() diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake -index 58c74ddda35..88f1ad1ddf5 100644 +index 58c74dd..d3e1ad4 100644 --- a/cmake/public/LoadHIP.cmake +++ b/cmake/public/LoadHIP.cmake @@ -26,12 +26,6 @@ else() @@ -78,7 +78,7 @@ index 58c74ddda35..88f1ad1ddf5 100644 find_package_and_print_version(amd_comgr REQUIRED) find_package_and_print_version(rocrand REQUIRED) find_package_and_print_version(hiprand REQUIRED) -@@ -171,10 +168,14 @@ if(HIP_FOUND) +@@ -171,7 +168,11 @@ if(HIP_FOUND) if(UNIX) find_package_and_print_version(rccl) find_package_and_print_version(hsa-runtime64 REQUIRED) @@ -89,8 +89,4 @@ index 58c74ddda35..88f1ad1ddf5 100644 + if(UNIX) # roctx is part of roctracer find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib) -- -+ set(ROCTRACER_INCLUDE_DIR $ENV{ROCTRACER_INCLUDE_DIR}) - set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}") - if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0") diff --git a/repos/spack_repo/builtin/packages/py_torch/Revert-PR159080.patch b/repos/spack_repo/builtin/packages/py_torch/Revert-PR159080.patch deleted file mode 100644 index 78fa6eef91b..00000000000 --- a/repos/spack_repo/builtin/packages/py_torch/Revert-PR159080.patch +++ /dev/null @@ -1,22 +0,0 @@ -diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake -index 018bca837a5..132f9670ff3 100644 ---- a/cmake/public/LoadHIP.cmake -+++ b/cmake/public/LoadHIP.cmake -@@ -6,7 +6,7 @@ set(PYTORCH_FOUND_HIP FALSE) - # In the latter case, if /opt/rocm does not exist emit status - # message and return. - if(DEFINED ENV{ROCM_PATH}) -- file(TO_CMAKE_PATH "$ENV{ROCM_PATH}" ROCM_PATH) -+ set(ROCM_PATH $ENV{ROCM_PATH}) - if(NOT EXISTS ${ROCM_PATH}) - message(FATAL_ERROR - "ROCM_PATH environment variable is set to ${ROCM_PATH} but does not exist.\n" -@@ -31,7 +31,7 @@ if(NOT DEFINED ENV{MAGMA_HOME}) - set(MAGMA_HOME ${ROCM_PATH}/magma) - set(ENV{MAGMA_HOME} ${ROCM_PATH}/magma) - else() -- file(TO_CMAKE_PATH "$ENV{MAGMA_HOME}" MAGMA_HOME) -+ set(MAGMA_HOME $ENV{MAGMA_HOME}) - endif() - - # MIOpen isn't a part of HIP-SDK for Windows and hence, may have a different diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 46d3673b21e..ba94622260a 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -29,6 +29,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): tags = ["e4s"] version("main", branch="main") + version("2.9.1", tag="v2.9.1", commit="d38164a545b4a4e4e0cf73ce67173f70574890b6") version("2.9.0", tag="v2.9.0", commit="0fabc3ba44823f257e70ce397d989c8de5e362c1") version("2.8.0", tag="v2.8.0", commit="ba56102387ef21a3b04b357e5b183d48f0afefc7") version("2.7.1", tag="v2.7.1", commit="e2d141dbde55c2a4370fac5165b0561b6af4798b") @@ -119,10 +120,6 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): conflicts("+cuda+rocm") conflicts("+gloo+rocm") - conflicts("+valgrind+rocm") - conflicts("+kineto+rocm") - conflicts("+caffe2+rocm") - conflicts("+xnnpack+rocm") conflicts("+rocm", when="@2.3", msg="Rocm doesn't support py-torch 2.3 release") conflicts("+rocm", when="@2.4", msg="Rocm doesn't support py-torch 2.4 release") conflicts("+rocm", when="@2.8", msg="Rocm doesn't support py-torch 2.8 release") @@ -305,7 +302,8 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("valgrind", when="+valgrind") with when("+rocm"): depends_on("hsa-rocr-dev") - depends_on("hip") + depends_on("hip@7.0:", when="@2.9") + depends_on("hip@:6.4", when="@:2.7") depends_on("rccl", when="+nccl") depends_on("rocprim") depends_on("hipcub") @@ -322,7 +320,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("miopen-hip") depends_on("rocminfo") depends_on("composable-kernel") - depends_on("hipsparselt", when="@2.8:") + depends_on("hipsparselt@7.0:", when="@2.9") depends_on("aotriton@0.8b", when="@2.5:2.6") depends_on("aotriton@0.9.2b", when="@2.7") depends_on("aotriton@0.10b", when="@2.8:") @@ -353,12 +351,6 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): sha256="5e56556a5698e6c43d0e7e9e3da6d7d819a4886bcd717e7b8e22ec08414a0b66", when="@2.8.0", ) - # https://github.com/pytorch/pytorch/pull/156486 - patch( - "https://github.com/pytorch/pytorch/commit/a23f4471b952d8cd630b860639e0aaa9be957d60.patch?full_index=1", - sha256="c99622bab1f2bd35674e2ee978a7b8896bb0b8e5d50172c4c60e691a2151ec9f", - when="@2.8.0 +rocm", - ) # https://github.com/pytorch/pytorch/issues/151592 patch("macos_rpath.patch", when="@2.7:") @@ -426,7 +418,6 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): patch("PR152569-Update-spack-includes-2.5.patch", when="@2.5+rocm") patch("PR152569-Update-spack-includes-2.6.patch", when="@2.6+rocm") patch("PR152569-Update-spack-includes-2.7.patch", when="@2.7+rocm") - patch("Revert-PR159080.patch", when="@2.9+rocm") # https://github.com/pytorch/pytorch/pull/147993 # prevents pytorch from potentially using system version of config.h @@ -576,6 +567,14 @@ def patch(self): "torch_global_deps PROPERTIES LINKER_LANGUAGE CXX", "caffe2/CMakeLists.txt", ) + if self.spec.satisfies("@2.6:+rocm"): + filter_file( + "find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib)", + "find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib)\n" + "set(ROCTRACER_INCLUDE_DIR $ENV{ROCTRACER_INCLUDE_DIR})", + "cmake/public/LoadHIP.cmake", + string=True, + ) if self.spec.satisfies("@2.1:2.7+rocm"): filter_file( "${ROCM_INCLUDE_DIRS}/rocm-core/rocm_version.h", @@ -765,9 +764,10 @@ def enable_or_disable(variant, keyword="USE", var=None): env.set("BLAS", "FLAME") env.set("WITH_BLAS", "FLAME") elif self.spec["blas"].name == "intel-oneapi-mkl": - env.set("BLAS", "MKL") - env.set("WITH_BLAS", "mkl") - env.set("INTEL_MKL_DIR", self.spec["mkl"].prefix.mkl.latest) + if "+mkldnn" in self.spec: + env.set("BLAS", "MKL") + env.set("WITH_BLAS", "mkl") + env.set("INTEL_MKL_DIR", self.spec["mkl"].prefix.mkl.latest) elif self.spec["blas"].name == "openblas": env.set("BLAS", "OpenBLAS") env.set("WITH_BLAS", "open") From e9798046a0e99026cb5fa49830632906ebf13b0c Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Tue, 25 Nov 2025 20:58:48 -0800 Subject: [PATCH 31/84] Removing temporary changes from ci/gitlab/configs --- .ci/gitlab/configs/ci.yaml | 2 +- .ci/gitlab/configs/linux/ci.yaml | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.ci/gitlab/configs/ci.yaml b/.ci/gitlab/configs/ci.yaml index 14fbb1f4baa..13d71bf94c2 100644 --- a/.ci/gitlab/configs/ci.yaml +++ b/.ci/gitlab/configs/ci.yaml @@ -22,7 +22,7 @@ ci: script:: - - if [ -n "$SPACK_EXTRA_MIRROR" ]; then spack mirror add local "${SPACK_EXTRA_MIRROR}/${SPACK_CI_STACK_NAME}"; fi - spack config blame mirrors - - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 1200 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) + - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 300 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) after_script: - - cat /proc/loadavg || true - cat /proc/meminfo | grep 'MemTotal\|MemFree' || true diff --git a/.ci/gitlab/configs/linux/ci.yaml b/.ci/gitlab/configs/linux/ci.yaml index d4ee463eafc..02f2d4b6ce6 100644 --- a/.ci/gitlab/configs/linux/ci.yaml +++ b/.ci/gitlab/configs/linux/ci.yaml @@ -10,10 +10,8 @@ ci: - match: - composable-kernel - py-torch - - aotriton - wrf build-job: - timeout: 600 minutes tags: [ "spack", "huge" ] variables: CI_JOB_SIZE: huge From ccdeb8b4ccc30c26e0ab8b0183ce5e03a6facd8f Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Wed, 26 Nov 2025 14:57:10 -0800 Subject: [PATCH 32/84] Including roctracer include path for 2.5 as well --- repos/spack_repo/builtin/packages/py_torch/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index ba94622260a..47a44928fd7 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -567,7 +567,7 @@ def patch(self): "torch_global_deps PROPERTIES LINKER_LANGUAGE CXX", "caffe2/CMakeLists.txt", ) - if self.spec.satisfies("@2.6:+rocm"): + if self.spec.satisfies("@2.5:+rocm"): filter_file( "find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib)", "find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib)\n" From 29aa6c7303efbb333c2db789c64078fba1473117 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Thu, 4 Dec 2025 14:12:06 -0500 Subject: [PATCH 33/84] CK build requires more than 6 hrs in ci --- .ci/gitlab/configs/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/gitlab/configs/ci.yaml b/.ci/gitlab/configs/ci.yaml index 13d71bf94c2..14fbb1f4baa 100644 --- a/.ci/gitlab/configs/ci.yaml +++ b/.ci/gitlab/configs/ci.yaml @@ -22,7 +22,7 @@ ci: script:: - - if [ -n "$SPACK_EXTRA_MIRROR" ]; then spack mirror add local "${SPACK_EXTRA_MIRROR}/${SPACK_CI_STACK_NAME}"; fi - spack config blame mirrors - - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 300 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) + - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 1200 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) after_script: - - cat /proc/loadavg || true - cat /proc/meminfo | grep 'MemTotal\|MemFree' || true From 7b13b0569b2d79cbcc11cbbefb7479624fd4a101 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Fri, 5 Dec 2025 13:44:32 -0500 Subject: [PATCH 34/84] Correction in timeout for CK build --- .ci/gitlab/configs/ci.yaml | 2 +- .ci/gitlab/configs/linux/ci.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.ci/gitlab/configs/ci.yaml b/.ci/gitlab/configs/ci.yaml index 14fbb1f4baa..13d71bf94c2 100644 --- a/.ci/gitlab/configs/ci.yaml +++ b/.ci/gitlab/configs/ci.yaml @@ -22,7 +22,7 @@ ci: script:: - - if [ -n "$SPACK_EXTRA_MIRROR" ]; then spack mirror add local "${SPACK_EXTRA_MIRROR}/${SPACK_CI_STACK_NAME}"; fi - spack config blame mirrors - - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 1200 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) + - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 300 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) after_script: - - cat /proc/loadavg || true - cat /proc/meminfo | grep 'MemTotal\|MemFree' || true diff --git a/.ci/gitlab/configs/linux/ci.yaml b/.ci/gitlab/configs/linux/ci.yaml index 02f2d4b6ce6..85ca9c4b3c5 100644 --- a/.ci/gitlab/configs/linux/ci.yaml +++ b/.ci/gitlab/configs/linux/ci.yaml @@ -13,6 +13,7 @@ ci: - wrf build-job: tags: [ "spack", "huge" ] + timeout: 600 minutes variables: CI_JOB_SIZE: huge SPACK_BUILD_JOBS: "12" From 377d5d3ba6ab1d62640fcd6451a9e4273804c0a4 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Mon, 8 Dec 2025 01:51:20 -0500 Subject: [PATCH 35/84] Adding AMDGPU_TARGETS for older versions for CK --- repos/spack_repo/builtin/packages/composable_kernel/package.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/repos/spack_repo/builtin/packages/composable_kernel/package.py b/repos/spack_repo/builtin/packages/composable_kernel/package.py index a63faf9e610..8ca761d97c2 100644 --- a/repos/spack_repo/builtin/packages/composable_kernel/package.py +++ b/repos/spack_repo/builtin/packages/composable_kernel/package.py @@ -108,6 +108,8 @@ def cmake_args(self): args.append(self.define_from_variant("GPU_TARGETS", "amdgpu_target")) else: args.append(self.define("INSTANCES_ONLY", "ON")) + if self.spec.satisfies("@:6.4"): + args.append(self.define_from_variant("AMDGPU_TARGETS", "amdgpu_target")) if self.run_tests: args.append(self.define("BUILD_TESTING", "ON")) elif self.spec.satisfies("@:6.1"): From 7af2229d36eb6a4283838302f5fa1382855fdf8b Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Mon, 8 Dec 2025 02:58:37 -0500 Subject: [PATCH 36/84] Passing amdgpu_target to CK --- repos/spack_repo/builtin/packages/py_torch/package.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 47a44928fd7..0580c86bcae 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -320,6 +320,8 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("miopen-hip") depends_on("rocminfo") depends_on("composable-kernel") + for tgt in ROCmPackage.amdgpu_targets: + depends_on(f"composable-kernel amdgpu_target={tgt}", when=f"amdgpu_target={tgt}") depends_on("hipsparselt@7.0:", when="@2.9") depends_on("aotriton@0.8b", when="@2.5:2.6") depends_on("aotriton@0.9.2b", when="@2.7") From 92962279262c3e9b837f46a012578307d9ab031d Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Mon, 8 Dec 2025 14:58:09 -0500 Subject: [PATCH 37/84] CK needs more than 5hrs --- .ci/gitlab/configs/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/gitlab/configs/ci.yaml b/.ci/gitlab/configs/ci.yaml index 13d71bf94c2..0cb9ebd39de 100644 --- a/.ci/gitlab/configs/ci.yaml +++ b/.ci/gitlab/configs/ci.yaml @@ -22,7 +22,7 @@ ci: script:: - - if [ -n "$SPACK_EXTRA_MIRROR" ]; then spack mirror add local "${SPACK_EXTRA_MIRROR}/${SPACK_CI_STACK_NAME}"; fi - spack config blame mirrors - - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 300 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) + - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 600 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) after_script: - - cat /proc/loadavg || true - cat /proc/meminfo | grep 'MemTotal\|MemFree' || true From 30980bce3f816284024e86771fdc864233f048f7 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Tue, 9 Dec 2025 16:53:36 -0500 Subject: [PATCH 38/84] Revert timeout as it doesn't help on expiring token --- .ci/gitlab/configs/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/gitlab/configs/ci.yaml b/.ci/gitlab/configs/ci.yaml index 0cb9ebd39de..13d71bf94c2 100644 --- a/.ci/gitlab/configs/ci.yaml +++ b/.ci/gitlab/configs/ci.yaml @@ -22,7 +22,7 @@ ci: script:: - - if [ -n "$SPACK_EXTRA_MIRROR" ]; then spack mirror add local "${SPACK_EXTRA_MIRROR}/${SPACK_CI_STACK_NAME}"; fi - spack config blame mirrors - - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 600 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) + - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 300 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) after_script: - - cat /proc/loadavg || true - cat /proc/meminfo | grep 'MemTotal\|MemFree' || true From e3be0555af4121e3d37753cdec60d0640d79651c Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Wed, 10 Dec 2025 02:07:12 -0500 Subject: [PATCH 39/84] disabling tests for ck --- .ci/gitlab/configs/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.ci/gitlab/configs/ci.yaml b/.ci/gitlab/configs/ci.yaml index 13d71bf94c2..f5f47ea83a3 100644 --- a/.ci/gitlab/configs/ci.yaml +++ b/.ci/gitlab/configs/ci.yaml @@ -4,6 +4,7 @@ ci: broken-tests-packages: - superlu-dist # srun -n 4 hangs - papyrus + - composable-kernel pipeline-gen: - build-job: From 244316e609726aef408beaa5b5343d67b7c7f681 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Wed, 10 Dec 2025 02:15:24 -0500 Subject: [PATCH 40/84] disabling tests for ck under external --- stacks/e4s-rocm-external/spack.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/stacks/e4s-rocm-external/spack.yaml b/stacks/e4s-rocm-external/spack.yaml index 78dccbb01fa..e539c7f6006 100644 --- a/stacks/e4s-rocm-external/spack.yaml +++ b/stacks/e4s-rocm-external/spack.yaml @@ -254,6 +254,7 @@ spack: image: ghcr.io/spack/e4s-rocm-base-x86_64:v6.4.3-1760790880 broken-tests-packages: - paraview + - composable-kernel cdash: build-group: E4S ROCm External From 29eceff44514bdc7899578e500dcc3d4a3c47aa2 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Wed, 10 Dec 2025 02:34:13 -0500 Subject: [PATCH 41/84] disabling tests for ck in ml-linux-x86_64-rocm ci --- stacks/ml-linux-x86_64-rocm/spack.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index cbef4e3fded..1fc6c3573be 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -81,6 +81,8 @@ spack: # - py-xgboost ci: + broken-tests-packages: + - composable-kernel pipeline-gen: - build-job: image: From 5b9802e85c61adb58ce9b88a8138f0f1672ae2d0 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Thu, 11 Dec 2025 01:33:40 -0500 Subject: [PATCH 42/84] Setting timeout to 600 again to check ck failure --- .ci/gitlab/configs/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/gitlab/configs/ci.yaml b/.ci/gitlab/configs/ci.yaml index f5f47ea83a3..07135513fef 100644 --- a/.ci/gitlab/configs/ci.yaml +++ b/.ci/gitlab/configs/ci.yaml @@ -23,7 +23,7 @@ ci: script:: - - if [ -n "$SPACK_EXTRA_MIRROR" ]; then spack mirror add local "${SPACK_EXTRA_MIRROR}/${SPACK_CI_STACK_NAME}"; fi - spack config blame mirrors - - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 300 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) + - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 600 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) after_script: - - cat /proc/loadavg || true - cat /proc/meminfo | grep 'MemTotal\|MemFree' || true From 5031bf262e1771c60d1843db063d7749a0151a3a Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Fri, 12 Dec 2025 02:09:52 -0500 Subject: [PATCH 43/84] Temporarily disabling everything else except py-torch --- stacks/ml-linux-x86_64-rocm/spack.yaml | 30 +++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index 1fc6c3573be..bf336fed371 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -46,22 +46,22 @@ spack: # - py-keras backend=torch # PyTorch - - py-botorch - - py-gpytorch - - py-kornia - - py-lightning - - py-pytorch-lightning - - py-segmentation-models-pytorch - - py-timm + #- py-botorch + #- py-gpytorch + #- py-kornia + #- py-lightning + #- py-pytorch-lightning + #- py-segmentation-models-pytorch + #- py-timm - py-torch - - py-torch-geometric - - py-torch-nvidia-apex - - py-torchaudio - - py-torchdata - - py-torchgeo - - py-torchmetrics - - py-torchvision - - py-vector-quantize-pytorch + #- py-torch-geometric + #- py-torch-nvidia-apex + #- py-torchaudio + #- py-torchdata + #- py-torchgeo + #- py-torchmetrics + #- py-torchvision + #- py-vector-quantize-pytorch # scikit-learn - py-scikit-learn From ea1f83542c4ade972cf3a9b3b60c9b041117e596 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Mon, 15 Dec 2025 18:11:42 -0500 Subject: [PATCH 44/84] Adjusting timeout at template-level --- .ci/gitlab/configs/ci.yaml | 2 +- .ci/gitlab/configs/linux/ci.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/gitlab/configs/ci.yaml b/.ci/gitlab/configs/ci.yaml index 07135513fef..f5f47ea83a3 100644 --- a/.ci/gitlab/configs/ci.yaml +++ b/.ci/gitlab/configs/ci.yaml @@ -23,7 +23,7 @@ ci: script:: - - if [ -n "$SPACK_EXTRA_MIRROR" ]; then spack mirror add local "${SPACK_EXTRA_MIRROR}/${SPACK_CI_STACK_NAME}"; fi - spack config blame mirrors - - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 600 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) + - - spack --color=always --backtrace ci rebuild -j ${SPACK_BUILD_JOBS} --tests --timeout 300 > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) after_script: - - cat /proc/loadavg || true - cat /proc/meminfo | grep 'MemTotal\|MemFree' || true diff --git a/.ci/gitlab/configs/linux/ci.yaml b/.ci/gitlab/configs/linux/ci.yaml index 85ca9c4b3c5..4b290d1aaa1 100644 --- a/.ci/gitlab/configs/linux/ci.yaml +++ b/.ci/gitlab/configs/linux/ci.yaml @@ -13,7 +13,7 @@ ci: - wrf build-job: tags: [ "spack", "huge" ] - timeout: 600 minutes + timeout: 1200 minutes variables: CI_JOB_SIZE: huge SPACK_BUILD_JOBS: "12" From f0a676c8dd75008bced4ff864d2e5b2b08b62f94 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Tue, 16 Dec 2025 13:29:55 -0500 Subject: [PATCH 45/84] =?UTF-8?q?Renewing=20temporary=20creds=20just=20bef?= =?UTF-8?q?ore=20the=20long=E2=80=91running=20CK=20upload=20begins,=20so?= =?UTF-8?q?=20they=20won=E2=80=99t=20age=20out=20mid=E2=80=91push.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .ci/gitlab/configs/ci.yaml | 10 +++++++++ stacks/ml-linux-x86_64-rocm/spack.yaml | 30 +++++++++++++------------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/.ci/gitlab/configs/ci.yaml b/.ci/gitlab/configs/ci.yaml index f5f47ea83a3..966b191399c 100644 --- a/.ci/gitlab/configs/ci.yaml +++ b/.ci/gitlab/configs/ci.yaml @@ -91,6 +91,16 @@ ci: - spack config add 'config:build_stage:["$tempdir/$user/spack-stage", "$user_cache_path/stage"]' - spack config blame config - echo Copying environment specs from ${SPACK_COPY_ONLY_SOURCE} to ${SPACK_COPY_ONLY_DESTINATION} + before_script: + - echo "Refreshing AWS STS session for buildcache publish…" + - CREDS="$(aws sts assume-role \ + --role-arn "${AWS_ROLE_ARN}" \ + --role-session-name "spack-push-${CI_PIPELINE_ID}-${CI_JOB_ID}" \ + --duration-seconds 3600)" + - export AWS_ACCESS_KEY_ID="$(echo "$CREDS" | jq -r .Credentials.AccessKeyId)" + - export AWS_SECRET_ACCESS_KEY="$(echo "$CREDS" | jq -r .Credentials.SecretAccessKey)" + - export AWS_SESSION_TOKEN="$(echo "$CREDS" | jq -r .Credentials.SessionToken)" + - aws sts get-caller-identity # sanity check - spack buildcache sync "${SPACK_COPY_ONLY_SOURCE}" "${SPACK_COPY_ONLY_DESTINATION}" - curl -fLsS https://spack.github.io/keys/spack-public-binary-key.pub -o /tmp/spack-public-binary-key.pub - spack gpg trust /tmp/spack-public-binary-key.pub diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index bf336fed371..1fc6c3573be 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -46,22 +46,22 @@ spack: # - py-keras backend=torch # PyTorch - #- py-botorch - #- py-gpytorch - #- py-kornia - #- py-lightning - #- py-pytorch-lightning - #- py-segmentation-models-pytorch - #- py-timm + - py-botorch + - py-gpytorch + - py-kornia + - py-lightning + - py-pytorch-lightning + - py-segmentation-models-pytorch + - py-timm - py-torch - #- py-torch-geometric - #- py-torch-nvidia-apex - #- py-torchaudio - #- py-torchdata - #- py-torchgeo - #- py-torchmetrics - #- py-torchvision - #- py-vector-quantize-pytorch + - py-torch-geometric + - py-torch-nvidia-apex + - py-torchaudio + - py-torchdata + - py-torchgeo + - py-torchmetrics + - py-torchvision + - py-vector-quantize-pytorch # scikit-learn - py-scikit-learn From 236fcc440cd8c9dfa850d0583205ec841fe81f62 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Tue, 16 Dec 2025 15:39:31 -0500 Subject: [PATCH 46/84] =?UTF-8?q?Revert=20"Renewing=20temporary=20creds=20?= =?UTF-8?q?just=20before=20the=20long=E2=80=91running=20CK=20upload"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f0a676c8dd75008bced4ff864d2e5b2b08b62f94. --- .ci/gitlab/configs/ci.yaml | 10 --------- stacks/ml-linux-x86_64-rocm/spack.yaml | 30 +++++++++++++------------- 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/.ci/gitlab/configs/ci.yaml b/.ci/gitlab/configs/ci.yaml index 966b191399c..f5f47ea83a3 100644 --- a/.ci/gitlab/configs/ci.yaml +++ b/.ci/gitlab/configs/ci.yaml @@ -91,16 +91,6 @@ ci: - spack config add 'config:build_stage:["$tempdir/$user/spack-stage", "$user_cache_path/stage"]' - spack config blame config - echo Copying environment specs from ${SPACK_COPY_ONLY_SOURCE} to ${SPACK_COPY_ONLY_DESTINATION} - before_script: - - echo "Refreshing AWS STS session for buildcache publish…" - - CREDS="$(aws sts assume-role \ - --role-arn "${AWS_ROLE_ARN}" \ - --role-session-name "spack-push-${CI_PIPELINE_ID}-${CI_JOB_ID}" \ - --duration-seconds 3600)" - - export AWS_ACCESS_KEY_ID="$(echo "$CREDS" | jq -r .Credentials.AccessKeyId)" - - export AWS_SECRET_ACCESS_KEY="$(echo "$CREDS" | jq -r .Credentials.SecretAccessKey)" - - export AWS_SESSION_TOKEN="$(echo "$CREDS" | jq -r .Credentials.SessionToken)" - - aws sts get-caller-identity # sanity check - spack buildcache sync "${SPACK_COPY_ONLY_SOURCE}" "${SPACK_COPY_ONLY_DESTINATION}" - curl -fLsS https://spack.github.io/keys/spack-public-binary-key.pub -o /tmp/spack-public-binary-key.pub - spack gpg trust /tmp/spack-public-binary-key.pub diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index 1fc6c3573be..bf336fed371 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -46,22 +46,22 @@ spack: # - py-keras backend=torch # PyTorch - - py-botorch - - py-gpytorch - - py-kornia - - py-lightning - - py-pytorch-lightning - - py-segmentation-models-pytorch - - py-timm + #- py-botorch + #- py-gpytorch + #- py-kornia + #- py-lightning + #- py-pytorch-lightning + #- py-segmentation-models-pytorch + #- py-timm - py-torch - - py-torch-geometric - - py-torch-nvidia-apex - - py-torchaudio - - py-torchdata - - py-torchgeo - - py-torchmetrics - - py-torchvision - - py-vector-quantize-pytorch + #- py-torch-geometric + #- py-torch-nvidia-apex + #- py-torchaudio + #- py-torchdata + #- py-torchgeo + #- py-torchmetrics + #- py-torchvision + #- py-vector-quantize-pytorch # scikit-learn - py-scikit-learn From dbc3e9b46cf3a32200e70948b95a2dc2748d738e Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Fri, 19 Dec 2025 14:23:16 -0800 Subject: [PATCH 47/84] Updating hipblaslt dependency --- repos/spack_repo/builtin/packages/py_torch/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 0580c86bcae..a48e9fb7064 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -319,9 +319,9 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("rocblas") depends_on("miopen-hip") depends_on("rocminfo") - depends_on("composable-kernel") for tgt in ROCmPackage.amdgpu_targets: depends_on(f"composable-kernel amdgpu_target={tgt}", when=f"amdgpu_target={tgt}") + depends_on("hipblaslt@7.0:", when="@2.9") depends_on("hipsparselt@7.0:", when="@2.9") depends_on("aotriton@0.8b", when="@2.5:2.6") depends_on("aotriton@0.9.2b", when="@2.7") From 9f9d5967672497699e315be263eb7aea88b4290c Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Fri, 19 Dec 2025 15:25:08 -0800 Subject: [PATCH 48/84] Updating ck dependency without gpu_target --- repos/spack_repo/builtin/packages/py_torch/package.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index a48e9fb7064..4e236df19b9 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -319,8 +319,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("rocblas") depends_on("miopen-hip") depends_on("rocminfo") - for tgt in ROCmPackage.amdgpu_targets: - depends_on(f"composable-kernel amdgpu_target={tgt}", when=f"amdgpu_target={tgt}") + depends_on("composable-kernel") depends_on("hipblaslt@7.0:", when="@2.9") depends_on("hipsparselt@7.0:", when="@2.9") depends_on("aotriton@0.8b", when="@2.5:2.6") From ce828b3e2c306a23fcd8d4d76bbff2716e6c48d5 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Wed, 24 Dec 2025 19:17:26 -0500 Subject: [PATCH 49/84] py-llvmlite standalone tests in broken-tests-packages --- .ci/gitlab/configs/ci.yaml | 1 + stacks/e4s-rocm-external/spack.yaml | 1 + stacks/ml-linux-x86_64-rocm/spack.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/.ci/gitlab/configs/ci.yaml b/.ci/gitlab/configs/ci.yaml index 26994a20f6f..05da9e656bc 100644 --- a/.ci/gitlab/configs/ci.yaml +++ b/.ci/gitlab/configs/ci.yaml @@ -5,6 +5,7 @@ ci: - superlu-dist # srun -n 4 hangs - papyrus - composable-kernel + - py-llvmlite pipeline-gen: - build-job: diff --git a/stacks/e4s-rocm-external/spack.yaml b/stacks/e4s-rocm-external/spack.yaml index e539c7f6006..69c196448f4 100644 --- a/stacks/e4s-rocm-external/spack.yaml +++ b/stacks/e4s-rocm-external/spack.yaml @@ -255,6 +255,7 @@ spack: broken-tests-packages: - paraview - composable-kernel + - py-llvmlite cdash: build-group: E4S ROCm External diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index bf336fed371..2fe16cb60e4 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -83,6 +83,7 @@ spack: ci: broken-tests-packages: - composable-kernel + - py-llvmlite pipeline-gen: - build-job: image: From f1b28414199825c081abd260f8077f36be0baac9 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Mon, 12 Jan 2026 17:53:57 -0500 Subject: [PATCH 50/84] py-llvmlite is incompatible with llvm-amdgpu --- repos/spack_repo/builtin/packages/py_llvmlite/package.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/repos/spack_repo/builtin/packages/py_llvmlite/package.py b/repos/spack_repo/builtin/packages/py_llvmlite/package.py index edee58db435..02674102e71 100644 --- a/repos/spack_repo/builtin/packages/py_llvmlite/package.py +++ b/repos/spack_repo/builtin/packages/py_llvmlite/package.py @@ -58,6 +58,10 @@ class PyLlvmlite(PythonPackage): depends_on("llvm@14", when="@0.41:0.43") depends_on("llvm@11:14", when="@0.40") depends_on("llvm@11", when="@0.37:0.39") + + # py-llvmlite is incompatible with llvm-amdgpu + conflicts("^llvm-amdgpu", msg="py-llvmlite is incompatible with llvm-amdgpu (ROCm LLVM fork)") + for t in [ "arm:", "ppc:", From e4ea8190399679583595a467dadcdbfd0f989d38 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Mon, 12 Jan 2026 19:24:38 -0500 Subject: [PATCH 51/84] Prevent building py-llvmlite --- repos/spack_repo/builtin/packages/py_llvmlite/package.py | 4 ---- stacks/ml-linux-x86_64-rocm/spack.yaml | 6 +++++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_llvmlite/package.py b/repos/spack_repo/builtin/packages/py_llvmlite/package.py index 02674102e71..edee58db435 100644 --- a/repos/spack_repo/builtin/packages/py_llvmlite/package.py +++ b/repos/spack_repo/builtin/packages/py_llvmlite/package.py @@ -58,10 +58,6 @@ class PyLlvmlite(PythonPackage): depends_on("llvm@14", when="@0.41:0.43") depends_on("llvm@11:14", when="@0.40") depends_on("llvm@11", when="@0.37:0.39") - - # py-llvmlite is incompatible with llvm-amdgpu - conflicts("^llvm-amdgpu", msg="py-llvmlite is incompatible with llvm-amdgpu (ROCm LLVM fork)") - for t in [ "arm:", "ppc:", diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index 2fe16cb60e4..8dc8c0a00eb 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -31,7 +31,11 @@ spack: - amdgpu_target=gfx90a - ~flash_attention - "%c,cxx=gcc" - + py-llvmlite: + buildable: false + externals: + - spec: py-llvmlite@0.46.0 + prefix: /dev/null # Dummy path to prevent building specs: # Hugging Face - py-transformers From d3ca5de47f93d0983ad046c176de2b324b924825 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Tue, 13 Jan 2026 01:44:32 -0500 Subject: [PATCH 52/84] skip numba since it requires llvmlite --- stacks/ml-linux-x86_64-rocm/spack.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index 8dc8c0a00eb..f58b8e8484c 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -36,6 +36,7 @@ spack: externals: - spec: py-llvmlite@0.46.0 prefix: /dev/null # Dummy path to prevent building + specs: # Hugging Face - py-transformers @@ -93,6 +94,8 @@ spack: image: name: ghcr.io/spack/ubuntu-24.04:v2025-09-15 entrypoint: [''] - + skip-packages: + - py-llvmlite + - py-numba # Also skip numba since it requires llvmlite cdash: build-group: Machine Learning From 0796b50f0d63ba6169cb6fb730473fd61914aadd Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Tue, 13 Jan 2026 02:09:11 -0500 Subject: [PATCH 53/84] Revert Dummy path --- stacks/ml-linux-x86_64-rocm/spack.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index f58b8e8484c..6719580c6be 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -31,11 +31,6 @@ spack: - amdgpu_target=gfx90a - ~flash_attention - "%c,cxx=gcc" - py-llvmlite: - buildable: false - externals: - - spec: py-llvmlite@0.46.0 - prefix: /dev/null # Dummy path to prevent building specs: # Hugging Face From 948b9e8dea656a7b745e3fd19fb98f18d479b7f2 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Tue, 13 Jan 2026 13:15:57 -0500 Subject: [PATCH 54/84] Rerstricting 6.3 rocm dependency --- repos/spack_repo/builtin/packages/py_torch/package.py | 2 +- stacks/ml-linux-x86_64-rocm/spack.yaml | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 814eb684c96..b5417b6ce18 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -303,7 +303,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): with when("+rocm"): depends_on("hsa-rocr-dev") depends_on("hip@7.0:", when="@2.9") - depends_on("hip@:6.4", when="@:2.7") + depends_on("hip@6.4", when="@:2.7") depends_on("rccl", when="+nccl") depends_on("rocprim") depends_on("hipcub") diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index 6719580c6be..5dc08a0c4a6 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -89,8 +89,5 @@ spack: image: name: ghcr.io/spack/ubuntu-24.04:v2025-09-15 entrypoint: [''] - skip-packages: - - py-llvmlite - - py-numba # Also skip numba since it requires llvmlite cdash: build-group: Machine Learning From cf44ac74b324727910a6e7b9676fef3a636327ab Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Tue, 13 Jan 2026 21:06:07 -0500 Subject: [PATCH 55/84] py-pandas 2.3.3 and above conflicts with llvm version --- repos/spack_repo/builtin/packages/py_torch/package.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index b5417b6ce18..fafad00f3de 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -180,7 +180,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("py-sympy@1.13.1", when="@2.5:2.6") depends_on("py-sympy", when="@2:") depends_on("py-networkx@2.5.1:", when="@2.9:") - depends_on("py-networkx", when="@2:") + depends_on("py-networkx ^py-pandas@:2.3.2", when="@2:") depends_on("py-jinja2", when="@2:") depends_on("py-fsspec@0.8.5:", when="@2.9:") depends_on("py-fsspec", when="@2.1:") @@ -303,7 +303,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): with when("+rocm"): depends_on("hsa-rocr-dev") depends_on("hip@7.0:", when="@2.9") - depends_on("hip@6.4", when="@:2.7") + depends_on("hip@:6.4", when="@:2.7") depends_on("rccl", when="+nccl") depends_on("rocprim") depends_on("hipcub") From 5e525aa83f4b120c2808a79006b574df74172f4e Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Tue, 13 Jan 2026 22:31:25 -0500 Subject: [PATCH 56/84] Reverting py-pandas version rule --- repos/spack_repo/builtin/packages/py_torch/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index fafad00f3de..814eb684c96 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -180,7 +180,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("py-sympy@1.13.1", when="@2.5:2.6") depends_on("py-sympy", when="@2:") depends_on("py-networkx@2.5.1:", when="@2.9:") - depends_on("py-networkx ^py-pandas@:2.3.2", when="@2:") + depends_on("py-networkx", when="@2:") depends_on("py-jinja2", when="@2:") depends_on("py-fsspec@0.8.5:", when="@2.9:") depends_on("py-fsspec", when="@2.1:") From 51828e47a5db67000ee070eff6cccdd95dd75ac0 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Wed, 14 Jan 2026 11:32:33 -0500 Subject: [PATCH 57/84] py-networkx@2.5.1 2.7 to avoid py-llvmlite dependency --- repos/spack_repo/builtin/packages/py_torch/package.py | 1 + 1 file changed, 1 insertion(+) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 814eb684c96..50feebcce5d 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -179,6 +179,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("py-sympy@1.13.3:", when="@2.7:") depends_on("py-sympy@1.13.1", when="@2.5:2.6") depends_on("py-sympy", when="@2:") + depends_on("py-networkx@2.5.1", when="@2.7 +rocm") depends_on("py-networkx@2.5.1:", when="@2.9:") depends_on("py-networkx", when="@2:") depends_on("py-jinja2", when="@2:") From 73526411300be69440409577d8fa3f4c9b667bd6 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Wed, 14 Jan 2026 13:12:43 -0500 Subject: [PATCH 58/84] py-llvmlite@0.45 to avoid conflict with llvm version --- repos/spack_repo/builtin/packages/py_torch/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 50feebcce5d..20994bceaf0 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -179,7 +179,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("py-sympy@1.13.3:", when="@2.7:") depends_on("py-sympy@1.13.1", when="@2.5:2.6") depends_on("py-sympy", when="@2:") - depends_on("py-networkx@2.5.1", when="@2.7 +rocm") + depends_on("py-llvmlite@0.45", when="@2.7 +rocm") depends_on("py-networkx@2.5.1:", when="@2.9:") depends_on("py-networkx", when="@2:") depends_on("py-jinja2", when="@2:") From 9ec09dd0f0a6973a1775d0c75af379535545c007 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Wed, 14 Jan 2026 13:51:53 -0500 Subject: [PATCH 59/84] py-llvmlite@0.44 to avoid conflict with llvm version --- repos/spack_repo/builtin/packages/py_torch/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 20994bceaf0..f4389617680 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -179,7 +179,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("py-sympy@1.13.3:", when="@2.7:") depends_on("py-sympy@1.13.1", when="@2.5:2.6") depends_on("py-sympy", when="@2:") - depends_on("py-llvmlite@0.45", when="@2.7 +rocm") + depends_on("py-llvmlite@0.44", when="@2.7 +rocm") depends_on("py-networkx@2.5.1:", when="@2.9:") depends_on("py-networkx", when="@2:") depends_on("py-jinja2", when="@2:") From 42bc46596bdda1dadef3e408b7476ea8055553b7 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Thu, 15 Jan 2026 02:51:16 -0500 Subject: [PATCH 60/84] Trigger aotriton build --- repos/spack_repo/builtin/packages/py_torch/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index f4389617680..bcc726aec5b 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -324,7 +324,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("hipblaslt@7.0:", when="@2.9") depends_on("hipsparselt@7.0:", when="@2.9") depends_on("aotriton@0.8b", when="@2.5:2.6") - depends_on("aotriton@0.9.2b", when="@2.7") + depends_on("aotriton@0.10b", when="@2.7") depends_on("aotriton@0.10b", when="@2.8:") depends_on("mpi", when="+mpi") depends_on("ucc", when="+ucc") From 9bfc7358d20a4ebc2e14be22d6d15fb32f6f1e48 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Thu, 15 Jan 2026 11:08:04 -0500 Subject: [PATCH 61/84] Trigger aotriton build --- repos/spack_repo/builtin/packages/py_torch/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index bcc726aec5b..f4389617680 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -324,7 +324,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("hipblaslt@7.0:", when="@2.9") depends_on("hipsparselt@7.0:", when="@2.9") depends_on("aotriton@0.8b", when="@2.5:2.6") - depends_on("aotriton@0.10b", when="@2.7") + depends_on("aotriton@0.9.2b", when="@2.7") depends_on("aotriton@0.10b", when="@2.8:") depends_on("mpi", when="+mpi") depends_on("ucc", when="+ucc") From 49f05d29e54eed2cb85f331d8c58c7789ea7ecc2 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Mon, 19 Jan 2026 12:10:30 -0800 Subject: [PATCH 62/84] AMDGPU_TARGETS not needed for ck --- repos/spack_repo/builtin/packages/composable_kernel/package.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/repos/spack_repo/builtin/packages/composable_kernel/package.py b/repos/spack_repo/builtin/packages/composable_kernel/package.py index 2c06807ff2e..17296fc5b03 100644 --- a/repos/spack_repo/builtin/packages/composable_kernel/package.py +++ b/repos/spack_repo/builtin/packages/composable_kernel/package.py @@ -112,8 +112,6 @@ def cmake_args(self): args.append(self.define_from_variant("GPU_TARGETS", "amdgpu_target")) else: args.append(self.define("INSTANCES_ONLY", "ON")) - if self.spec.satisfies("@:6.4"): - args.append(self.define_from_variant("AMDGPU_TARGETS", "amdgpu_target")) if self.run_tests: args.append(self.define("BUILD_TESTING", "ON")) elif self.spec.satisfies("@:6.1"): From e5ed8befe049a246150d3bb70fa3586c396fb094 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Thu, 22 Jan 2026 00:26:45 -0800 Subject: [PATCH 63/84] Adding aotriton include path --- .../PR152569-Update-spack-includes-2.7.patch | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.7.patch b/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.7.patch index 4392e00d76a..173aabc12aa 100644 --- a/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.7.patch +++ b/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.7.patch @@ -1,5 +1,5 @@ diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt -index d2d23b7..620a89f 100644 +index d2d23b7ab65..620a89f65cb 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -1379,13 +1379,6 @@ if(USE_ROCM) @@ -26,7 +26,7 @@ index d2d23b7..620a89f 100644 endif() diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake -index 58c74dd..d3e1ad4 100644 +index 58c74ddda35..54f96871372 100644 --- a/cmake/public/LoadHIP.cmake +++ b/cmake/public/LoadHIP.cmake @@ -26,12 +26,6 @@ else() @@ -78,7 +78,15 @@ index 58c74dd..d3e1ad4 100644 find_package_and_print_version(amd_comgr REQUIRED) find_package_and_print_version(rocrand REQUIRED) find_package_and_print_version(hiprand REQUIRED) -@@ -171,7 +168,11 @@ if(HIP_FOUND) +@@ -157,6 +154,7 @@ if(HIP_FOUND) + find_package_and_print_version(hipcub REQUIRED) + find_package_and_print_version(rocthrust REQUIRED) + find_package_and_print_version(hipsolver REQUIRED) ++ list(APPEND ROCM_INCLUDE_DIRS $ENV{AOTRITON_INSTALLED_PREFIX}/include) + # workaround cmake 4 build issue + if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0") + message(WARNING "Work around hiprtc cmake failure for cmake >= 4") +@@ -171,7 +169,11 @@ if(HIP_FOUND) if(UNIX) find_package_and_print_version(rccl) find_package_and_print_version(hsa-runtime64 REQUIRED) From d85b5219c6747bca5ab5a5dd3c35bba76325d0ce Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Thu, 22 Jan 2026 20:40:50 -0800 Subject: [PATCH 64/84] enabling packages depending py-torch --- stacks/ml-linux-x86_64-rocm/spack.yaml | 30 +++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index 5dc08a0c4a6..648a0de6c6d 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -46,22 +46,22 @@ spack: # - py-keras backend=torch # PyTorch - #- py-botorch - #- py-gpytorch - #- py-kornia - #- py-lightning - #- py-pytorch-lightning - #- py-segmentation-models-pytorch - #- py-timm + - py-botorch + - py-gpytorch + - py-kornia + - py-lightning + - py-pytorch-lightning + - py-segmentation-models-pytorch + - py-timm - py-torch - #- py-torch-geometric - #- py-torch-nvidia-apex - #- py-torchaudio - #- py-torchdata - #- py-torchgeo - #- py-torchmetrics - #- py-torchvision - #- py-vector-quantize-pytorch + - py-torch-geometric + - py-torch-nvidia-apex + - py-torchaudio + - py-torchdata + - py-torchgeo + - py-torchmetrics + - py-torchvision + - py-vector-quantize-pytorch # scikit-learn - py-scikit-learn From b5ddb668a61cefe1191eec96de96e1f75893000e Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Fri, 23 Jan 2026 18:02:04 -0500 Subject: [PATCH 65/84] Disable py-torchaudio and py-torchvision --- stacks/ml-linux-x86_64-rocm/spack.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index 648a0de6c6d..eeafb2ab831 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -56,11 +56,11 @@ spack: - py-torch - py-torch-geometric - py-torch-nvidia-apex - - py-torchaudio + # - py-torchaudio - py-torchdata - py-torchgeo - py-torchmetrics - - py-torchvision + # - py-torchvision - py-vector-quantize-pytorch # scikit-learn From a64fcb96e789593a9951a43b76cf5e19b7254469 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Tue, 27 Jan 2026 03:44:16 -0500 Subject: [PATCH 66/84] passing gpu_target to hipblaslt and limiting py-llvmlite dependency --- .../builtin/packages/py_torch/package.py | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index f4389617680..f9e214b158e 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -179,7 +179,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("py-sympy@1.13.3:", when="@2.7:") depends_on("py-sympy@1.13.1", when="@2.5:2.6") depends_on("py-sympy", when="@2:") - depends_on("py-llvmlite@0.44", when="@2.7 +rocm") + depends_on("py-llvmlite@0.44", when="@2.5:2.7 +rocm") depends_on("py-networkx@2.5.1:", when="@2.9:") depends_on("py-networkx", when="@2:") depends_on("py-jinja2", when="@2:") @@ -319,9 +319,23 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("rocfft") depends_on("rocblas") depends_on("miopen-hip") + for target in ROCmPackage.amdgpu_targets: + depends_on( + f"composable-kernel amdgpu_target={target}", + when=f"amdgpu_target={target}" + ) + # This constraint applies to ANY hipblaslt in the dependency tree + # including the one used by miopen-hip + depends_on( + f"hipblaslt amdgpu_target={target}", + when=f"amdgpu_target={target}" + ) + # Ensure hipblaslt version for 2.9+ + depends_on( + f"hipblaslt@7.0: amdgpu_target={target}", + when=f"@2.9 amdgpu_target={target}" + ) depends_on("rocminfo") - depends_on("composable-kernel") - depends_on("hipblaslt@7.0:", when="@2.9") depends_on("hipsparselt@7.0:", when="@2.9") depends_on("aotriton@0.8b", when="@2.5:2.6") depends_on("aotriton@0.9.2b", when="@2.7") From b2a060130eb928fcdcae85385ca76dad4d939528 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath <94420380+renjithravindrankannath@users.noreply.github.com> Date: Tue, 27 Jan 2026 08:47:57 +0000 Subject: [PATCH 67/84] [@spackbot] updating style on behalf of renjithravindrankannath --- repos/spack_repo/builtin/packages/nmad/package.py | 3 ++- .../spack_repo/builtin/packages/padicotm/package.py | 3 ++- repos/spack_repo/builtin/packages/pioman/package.py | 3 ++- repos/spack_repo/builtin/packages/puk/package.py | 1 + repos/spack_repo/builtin/packages/pukabi/package.py | 3 ++- .../spack_repo/builtin/packages/py_torch/package.py | 13 +++---------- 6 files changed, 12 insertions(+), 14 deletions(-) diff --git a/repos/spack_repo/builtin/packages/nmad/package.py b/repos/spack_repo/builtin/packages/nmad/package.py index 6d7d5f36030..4fe07e8806c 100644 --- a/repos/spack_repo/builtin/packages/nmad/package.py +++ b/repos/spack_repo/builtin/packages/nmad/package.py @@ -3,9 +3,10 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) from spack_repo.builtin.build_systems.autotools import AutotoolsPackage -from spack.package import * from spack_repo.builtin.packages.puk.package import Puk +from spack.package import * + class Nmad(AutotoolsPackage): """NewMadeleine communication library. diff --git a/repos/spack_repo/builtin/packages/padicotm/package.py b/repos/spack_repo/builtin/packages/padicotm/package.py index e82337c41ed..4cce7866440 100644 --- a/repos/spack_repo/builtin/packages/padicotm/package.py +++ b/repos/spack_repo/builtin/packages/padicotm/package.py @@ -3,9 +3,10 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) from spack_repo.builtin.build_systems.autotools import AutotoolsPackage -from spack.package import * from spack_repo.builtin.packages.puk.package import Puk +from spack.package import * + class Padicotm(AutotoolsPackage): """PadicoTM communication framework and launcher. diff --git a/repos/spack_repo/builtin/packages/pioman/package.py b/repos/spack_repo/builtin/packages/pioman/package.py index bd7b4dcf9fd..7c1c1fd4c03 100644 --- a/repos/spack_repo/builtin/packages/pioman/package.py +++ b/repos/spack_repo/builtin/packages/pioman/package.py @@ -3,9 +3,10 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) from spack_repo.builtin.build_systems.autotools import AutotoolsPackage -from spack.package import * from spack_repo.builtin.packages.puk.package import Puk +from spack.package import * + class Pioman(AutotoolsPackage): """PIOMan I/O manager. diff --git a/repos/spack_repo/builtin/packages/puk/package.py b/repos/spack_repo/builtin/packages/puk/package.py index 9f9a91bb218..c93f76f3392 100644 --- a/repos/spack_repo/builtin/packages/puk/package.py +++ b/repos/spack_repo/builtin/packages/puk/package.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) from spack_repo.builtin.build_systems.autotools import AutotoolsPackage + from spack.package import * diff --git a/repos/spack_repo/builtin/packages/pukabi/package.py b/repos/spack_repo/builtin/packages/pukabi/package.py index c15abddeea9..690e0c53b56 100644 --- a/repos/spack_repo/builtin/packages/pukabi/package.py +++ b/repos/spack_repo/builtin/packages/pukabi/package.py @@ -3,9 +3,10 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) from spack_repo.builtin.build_systems.autotools import AutotoolsPackage -from spack.package import * from spack_repo.builtin.packages.puk.package import Puk +from spack.package import * + class Pukabi(AutotoolsPackage): """PukABI: ABI manager for PadicoTM. diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index f9e214b158e..5167b2eaafb 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -320,20 +320,13 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("rocblas") depends_on("miopen-hip") for target in ROCmPackage.amdgpu_targets: - depends_on( - f"composable-kernel amdgpu_target={target}", - when=f"amdgpu_target={target}" - ) + depends_on(f"composable-kernel amdgpu_target={target}", when=f"amdgpu_target={target}") # This constraint applies to ANY hipblaslt in the dependency tree # including the one used by miopen-hip - depends_on( - f"hipblaslt amdgpu_target={target}", - when=f"amdgpu_target={target}" - ) + depends_on(f"hipblaslt amdgpu_target={target}", when=f"amdgpu_target={target}") # Ensure hipblaslt version for 2.9+ depends_on( - f"hipblaslt@7.0: amdgpu_target={target}", - when=f"@2.9 amdgpu_target={target}" + f"hipblaslt@7.0: amdgpu_target={target}", when=f"@2.9 amdgpu_target={target}" ) depends_on("rocminfo") depends_on("hipsparselt@7.0:", when="@2.9") From e49d3429c37cebb9c5aaebd9d8e4d62051bf1446 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Tue, 27 Jan 2026 13:21:12 -0500 Subject: [PATCH 68/84] aotriton release updates --- repos/spack_repo/builtin/packages/aotriton/package.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/aotriton/package.py b/repos/spack_repo/builtin/packages/aotriton/package.py index 502955b16e4..394c59bdc47 100644 --- a/repos/spack_repo/builtin/packages/aotriton/package.py +++ b/repos/spack_repo/builtin/packages/aotriton/package.py @@ -18,6 +18,12 @@ class Aotriton(CMakePackage): maintainers("afzpatel", "srekolam", "renjithravindrankannath") license("MIT") + version( + "0.11.1b", tag="0.11.1b", commit="98371989e8a23267e284c94e95156a139e4b33c4", submodules=True + ) + version( + "0.11b", tag="0.11b", commit="972223c501ffc22068bb035ac5d64cf54318d895", submodules=True + ) version( "0.10b", tag="0.10b", commit="6fca155f4deeb8d9529326f7b69f350aeeb93477", submodules=True ) @@ -94,7 +100,7 @@ def patch(self): "third_party/triton/python/setup.py", string=True, ) - if self.spec.satisfies("@10.0"): + if self.spec.satisfies("@0.10:"): filter_file( r"LLVM_INCLUDE_DIRS", f"{self.spec['aotriton-llvm'].prefix}/include", From 9df3fc4bf326819d685af5017fa81e18874c01a8 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath <94420380+renjithravindrankannath@users.noreply.github.com> Date: Tue, 27 Jan 2026 18:24:55 +0000 Subject: [PATCH 69/84] [@spackbot] updating style on behalf of renjithravindrankannath --- repos/spack_repo/builtin/packages/aotriton/package.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/aotriton/package.py b/repos/spack_repo/builtin/packages/aotriton/package.py index 394c59bdc47..c2975789320 100644 --- a/repos/spack_repo/builtin/packages/aotriton/package.py +++ b/repos/spack_repo/builtin/packages/aotriton/package.py @@ -19,7 +19,10 @@ class Aotriton(CMakePackage): license("MIT") version( - "0.11.1b", tag="0.11.1b", commit="98371989e8a23267e284c94e95156a139e4b33c4", submodules=True + "0.11.1b", + tag="0.11.1b", + commit="98371989e8a23267e284c94e95156a139e4b33c4", + submodules=True, ) version( "0.11b", tag="0.11b", commit="972223c501ffc22068bb035ac5d64cf54318d895", submodules=True From ca6c4fa0a9a24012d54a5ac7a905b5e2fa869b9e Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Tue, 27 Jan 2026 23:12:20 -0500 Subject: [PATCH 70/84] Updating aotriton dependency --- repos/spack_repo/builtin/packages/py_torch/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 5167b2eaafb..c9a5d31571e 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -330,7 +330,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): ) depends_on("rocminfo") depends_on("hipsparselt@7.0:", when="@2.9") - depends_on("aotriton@0.8b", when="@2.5:2.6") + depends_on("aotriton@0.8.1b", when="@2.5:2.6") depends_on("aotriton@0.9.2b", when="@2.7") depends_on("aotriton@0.10b", when="@2.8:") depends_on("mpi", when="+mpi") From 33a1d310becd2c01d35c9533ca1cb04949cc2940 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Wed, 28 Jan 2026 14:01:10 -0500 Subject: [PATCH 71/84] Updating 2.5 patch with aotriton path --- .../py_torch/PR152569-Update-spack-includes-2.5.patch | 5 +++-- stacks/ml-linux-x86_64-rocm/spack.yaml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.5.patch b/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.5.patch index 2e7a80bcbe8..dd2fa02952e 100644 --- a/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.5.patch +++ b/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.5.patch @@ -25,10 +25,10 @@ index 9be7f37..39d0f24 100644 endif() diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake -index 1c0d3a2..e0de4b1 100644 +index 1c0d3a2..13f2003 100644 --- a/cmake/public/LoadHIP.cmake +++ b/cmake/public/LoadHIP.cmake -@@ -167,6 +167,10 @@ if(HIP_FOUND) +@@ -167,6 +167,11 @@ if(HIP_FOUND) find_package_and_print_version(hipsolver REQUIRED) find_package_and_print_version(hiprtc REQUIRED) @@ -36,6 +36,7 @@ index 1c0d3a2..e0de4b1 100644 + list(APPEND ROCM_INCLUDE ${rocprim_INCLUDE_DIR}) + list(APPEND ROCM_INCLUDE ${hipcub_INCLUDE_DIR}) + list(APPEND ROCM_INCLUDE ${rocRAND_INCLUDE_DIR}) ++ list(APPEND ROCM_INCLUDE_DIRS $ENV{AOTRITON_INSTALLED_PREFIX}/include) find_library(PYTORCH_HIP_LIBRARIES amdhip64 HINTS ${ROCM_PATH}/lib) # TODO: miopen_LIBRARIES should return fullpath to the library file, diff --git a/stacks/ml-linux-x86_64-rocm/spack.yaml b/stacks/ml-linux-x86_64-rocm/spack.yaml index eeafb2ab831..2cf01c3d96b 100644 --- a/stacks/ml-linux-x86_64-rocm/spack.yaml +++ b/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -55,7 +55,7 @@ spack: - py-timm - py-torch - py-torch-geometric - - py-torch-nvidia-apex + # - py-torch-nvidia-apex # - py-torchaudio - py-torchdata - py-torchgeo From 22e052f818f083295012d59d6f84271361995810 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Wed, 28 Jan 2026 17:03:15 -0500 Subject: [PATCH 72/84] Revert aotriton dependency change --- repos/spack_repo/builtin/packages/py_torch/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index c9a5d31571e..5167b2eaafb 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -330,7 +330,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): ) depends_on("rocminfo") depends_on("hipsparselt@7.0:", when="@2.9") - depends_on("aotriton@0.8.1b", when="@2.5:2.6") + depends_on("aotriton@0.8b", when="@2.5:2.6") depends_on("aotriton@0.9.2b", when="@2.7") depends_on("aotriton@0.10b", when="@2.8:") depends_on("mpi", when="+mpi") From 9cd7c1aa8be785997a3e4954285560a618864a61 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Thu, 29 Jan 2026 01:23:57 -0500 Subject: [PATCH 73/84] Updating aotriton path for 2.5 --- .../py_torch/PR152569-Update-spack-includes-2.5.patch | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.5.patch b/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.5.patch index dd2fa02952e..2c35aafac2f 100644 --- a/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.5.patch +++ b/repos/spack_repo/builtin/packages/py_torch/PR152569-Update-spack-includes-2.5.patch @@ -25,7 +25,7 @@ index 9be7f37..39d0f24 100644 endif() diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake -index 1c0d3a2..13f2003 100644 +index 1c0d3a2..83f9f9d 100644 --- a/cmake/public/LoadHIP.cmake +++ b/cmake/public/LoadHIP.cmake @@ -167,6 +167,11 @@ if(HIP_FOUND) @@ -36,7 +36,7 @@ index 1c0d3a2..13f2003 100644 + list(APPEND ROCM_INCLUDE ${rocprim_INCLUDE_DIR}) + list(APPEND ROCM_INCLUDE ${hipcub_INCLUDE_DIR}) + list(APPEND ROCM_INCLUDE ${rocRAND_INCLUDE_DIR}) -+ list(APPEND ROCM_INCLUDE_DIRS $ENV{AOTRITON_INSTALLED_PREFIX}/include) ++ list(APPEND ROCM_INCLUDE $ENV{AOTRITON_INSTALLED_PREFIX}/include) find_library(PYTORCH_HIP_LIBRARIES amdhip64 HINTS ${ROCM_PATH}/lib) # TODO: miopen_LIBRARIES should return fullpath to the library file, From 501a60ba4e24ee989351d763e44e584bfdf1922e Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Mon, 2 Feb 2026 18:19:42 -0500 Subject: [PATCH 74/84] py-llvmlite 0.46.0 requires hwloc without rocm --- repos/spack_repo/builtin/packages/py_torch/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 5167b2eaafb..aca9272783e 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -179,7 +179,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("py-sympy@1.13.3:", when="@2.7:") depends_on("py-sympy@1.13.1", when="@2.5:2.6") depends_on("py-sympy", when="@2:") - depends_on("py-llvmlite@0.44", when="@2.5:2.7 +rocm") + depends_on("hwloc~rocm", when="@2.5:2.7") depends_on("py-networkx@2.5.1:", when="@2.9:") depends_on("py-networkx", when="@2:") depends_on("py-jinja2", when="@2:") From 5b3b323df0b408b0bad193e21573731acb23ce70 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Tue, 3 Feb 2026 15:50:10 -0500 Subject: [PATCH 75/84] Revert "py-llvmlite 0.46.0 requires hwloc without rocm" This reverts commit 501a60ba4e24ee989351d763e44e584bfdf1922e. --- repos/spack_repo/builtin/packages/py_torch/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index aca9272783e..5167b2eaafb 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -179,7 +179,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("py-sympy@1.13.3:", when="@2.7:") depends_on("py-sympy@1.13.1", when="@2.5:2.6") depends_on("py-sympy", when="@2:") - depends_on("hwloc~rocm", when="@2.5:2.7") + depends_on("py-llvmlite@0.44", when="@2.5:2.7 +rocm") depends_on("py-networkx@2.5.1:", when="@2.9:") depends_on("py-networkx", when="@2:") depends_on("py-jinja2", when="@2:") From 426f2b924a0ee2e455b20ac801e7a7ce1e40ab5f Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Tue, 3 Feb 2026 18:53:56 -0500 Subject: [PATCH 76/84] py-llvmlite 0.46 which requires llvm 20 create conflict with hwloc 2.12 with older rocm versions --- repos/spack_repo/builtin/packages/hwloc/package.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/hwloc/package.py b/repos/spack_repo/builtin/packages/hwloc/package.py index 65ad3db3056..b1dfe2500de 100644 --- a/repos/spack_repo/builtin/packages/hwloc/package.py +++ b/repos/spack_repo/builtin/packages/hwloc/package.py @@ -135,7 +135,8 @@ class Hwloc(AutotoolsPackage, CudaPackage, ROCmPackage): depends_on("mpi", when="+netloc") with when("+rocm"): - depends_on("rocm-smi-lib") + depends_on("rocm-smi-lib@7.0:", when="@2.12.2:") + depends_on("rocm-smi-lib@:6.4", when="@:2.11.1") depends_on("rocm-opencl", when="+opencl") # Avoid a circular dependency since the openmp # variant of llvm-amdgpu depends on hwloc. From 97e3ac73cb06e212357dcea3dfcf9894fa31a7d5 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Wed, 4 Feb 2026 01:18:59 -0500 Subject: [PATCH 77/84] py-llvmlite 0.46 which requires llvm 20 create conflict with hwloc 2.12 with older rocm versions --- repos/spack_repo/builtin/packages/hwloc/package.py | 3 +-- repos/spack_repo/builtin/packages/py_torch/package.py | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/repos/spack_repo/builtin/packages/hwloc/package.py b/repos/spack_repo/builtin/packages/hwloc/package.py index b1dfe2500de..9ab963dfdff 100644 --- a/repos/spack_repo/builtin/packages/hwloc/package.py +++ b/repos/spack_repo/builtin/packages/hwloc/package.py @@ -135,8 +135,7 @@ class Hwloc(AutotoolsPackage, CudaPackage, ROCmPackage): depends_on("mpi", when="+netloc") with when("+rocm"): - depends_on("rocm-smi-lib@7.0:", when="@2.12.2:") - depends_on("rocm-smi-lib@:6.4", when="@:2.11.1") + depends_on("rocm-smi-lib@7.0:") depends_on("rocm-opencl", when="+opencl") # Avoid a circular dependency since the openmp # variant of llvm-amdgpu depends on hwloc. diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 5167b2eaafb..e08738a4537 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -179,7 +179,6 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("py-sympy@1.13.3:", when="@2.7:") depends_on("py-sympy@1.13.1", when="@2.5:2.6") depends_on("py-sympy", when="@2:") - depends_on("py-llvmlite@0.44", when="@2.5:2.7 +rocm") depends_on("py-networkx@2.5.1:", when="@2.9:") depends_on("py-networkx", when="@2:") depends_on("py-jinja2", when="@2:") From f6b12b59ed2a01f286b6de9ff47e027e30478db6 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Fri, 6 Feb 2026 13:05:39 -0500 Subject: [PATCH 78/84] Update for 2.10 on rocm --- repos/spack_repo/builtin/packages/py_torch/package.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index 8f38e62f937..c2f224b1b59 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -306,7 +306,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("valgrind", when="+valgrind") with when("+rocm"): depends_on("hsa-rocr-dev") - depends_on("hip@7.0:", when="@2.9") + depends_on("hip@7.0:", when="@2.9:") depends_on("hip@:6.4", when="@:2.7") depends_on("rccl", when="+nccl") depends_on("rocprim") @@ -329,10 +329,10 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on(f"hipblaslt amdgpu_target={target}", when=f"amdgpu_target={target}") # Ensure hipblaslt version for 2.9+ depends_on( - f"hipblaslt@7.0: amdgpu_target={target}", when=f"@2.9 amdgpu_target={target}" + f"hipblaslt@7.0: amdgpu_target={target}", when=f"@2.9: amdgpu_target={target}" ) depends_on("rocminfo") - depends_on("hipsparselt@7.0:", when="@2.9") + depends_on("hipsparselt@7.0:", when="@2.9:") depends_on("aotriton@0.8b", when="@2.5:2.6") depends_on("aotriton@0.9.2b", when="@2.7") depends_on("aotriton@0.10b", when="@2.8:") From 34c0eb4e50e3c4aa235aca24d3d6d7cb276d6c29 Mon Sep 17 00:00:00 2001 From: Renjith Ravindran Date: Sun, 8 Feb 2026 23:39:42 -0800 Subject: [PATCH 79/84] Temporarily reverting mkldnn check to verify --- repos/spack_repo/builtin/packages/py_torch/package.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_torch/package.py b/repos/spack_repo/builtin/packages/py_torch/package.py index c2f224b1b59..510408b1987 100644 --- a/repos/spack_repo/builtin/packages/py_torch/package.py +++ b/repos/spack_repo/builtin/packages/py_torch/package.py @@ -776,10 +776,9 @@ def enable_or_disable(variant, keyword="USE", var=None): env.set("BLAS", "FLAME") env.set("WITH_BLAS", "FLAME") elif self.spec["blas"].name == "intel-oneapi-mkl": - if "+mkldnn" in self.spec: - env.set("BLAS", "MKL") - env.set("WITH_BLAS", "mkl") - env.set("INTEL_MKL_DIR", self.spec["mkl"].prefix.mkl.latest) + env.set("BLAS", "MKL") + env.set("WITH_BLAS", "mkl") + env.set("INTEL_MKL_DIR", self.spec["mkl"].prefix.mkl.latest) elif self.spec["blas"].name == "openblas": env.set("BLAS", "OpenBLAS") env.set("WITH_BLAS", "open") From fcffa060265a61006a855c6824a026506aa1df17 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Tue, 10 Feb 2026 21:54:19 -0500 Subject: [PATCH 80/84] py-torchvision requires rocm math lib paths indirectly when py-torch is built with rocm --- .../builtin/packages/py_torchvision/package.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/repos/spack_repo/builtin/packages/py_torchvision/package.py b/repos/spack_repo/builtin/packages/py_torchvision/package.py index 4afaa9895c7..073a4580c34 100644 --- a/repos/spack_repo/builtin/packages/py_torchvision/package.py +++ b/repos/spack_repo/builtin/packages/py_torchvision/package.py @@ -197,6 +197,18 @@ def setup_build_environment(self, env: EnvironmentModifications) -> None: query = self.spec[dep.name] include.extend(query.headers.directories) library.extend(query.libs.directories) + # PyTorch headers include rocthrust, rocprim, hipsparse, hipblas, hipblas-common, + # hipblaslt and hipsolver headers; when building with ROCm we need these headers + # in the include path (py-torch depends on these headers, but it is not a direct + # link dep of torchvision). + if "^py-torch+rocm" in self.spec: + include.extend(self.spec["rocthrust"].headers.directories) + include.extend(self.spec["rocprim"].headers.directories) + include.extend(self.spec["hipsparse"].headers.directories) + include.extend(self.spec["hipblas"].headers.directories) + include.extend(self.spec["hipblas-common"].headers.directories) + include.extend(self.spec["hipblaslt"].headers.directories) + include.extend(self.spec["hipsolver"].headers.directories) # CONTRIBUTING.md says to use TORCHVISION_INCLUDE and TORCHVISION_LIBRARY, but # these do not work for older releases. Build uses a mix of Spack's compiler wrapper From f7b686f6b310799874847c49754f2775dd944296 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Wed, 11 Feb 2026 23:36:36 -0500 Subject: [PATCH 81/84] Removing unwanted line in aotriton-llvm --- repos/spack_repo/builtin/packages/aotriton_llvm/package.py | 1 - 1 file changed, 1 deletion(-) diff --git a/repos/spack_repo/builtin/packages/aotriton_llvm/package.py b/repos/spack_repo/builtin/packages/aotriton_llvm/package.py index 43775ef3beb..c23460c6f10 100644 --- a/repos/spack_repo/builtin/packages/aotriton_llvm/package.py +++ b/repos/spack_repo/builtin/packages/aotriton_llvm/package.py @@ -15,7 +15,6 @@ class AotritonLlvm(CMakePackage, CudaPackage, CompilerPackage): homepage = "https://github.com/llvm/llvm-project" git = "https://github.com/llvm/llvm-project" url = "https://github.com/llvm/llvm-project/archive/llvmorg-7.1.0.tar.gz" - tags = ["rocm"] maintainers("afzpatel", "srekolam", "renjithravindrankannath") From 3ba2d6aac7abb588c5db91db21f528aa101901da Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Thu, 12 Feb 2026 00:35:07 -0500 Subject: [PATCH 82/84] Temporarily reverting math lib include path to test --- .../builtin/packages/py_torchvision/package.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_torchvision/package.py b/repos/spack_repo/builtin/packages/py_torchvision/package.py index 073a4580c34..4afaa9895c7 100644 --- a/repos/spack_repo/builtin/packages/py_torchvision/package.py +++ b/repos/spack_repo/builtin/packages/py_torchvision/package.py @@ -197,18 +197,6 @@ def setup_build_environment(self, env: EnvironmentModifications) -> None: query = self.spec[dep.name] include.extend(query.headers.directories) library.extend(query.libs.directories) - # PyTorch headers include rocthrust, rocprim, hipsparse, hipblas, hipblas-common, - # hipblaslt and hipsolver headers; when building with ROCm we need these headers - # in the include path (py-torch depends on these headers, but it is not a direct - # link dep of torchvision). - if "^py-torch+rocm" in self.spec: - include.extend(self.spec["rocthrust"].headers.directories) - include.extend(self.spec["rocprim"].headers.directories) - include.extend(self.spec["hipsparse"].headers.directories) - include.extend(self.spec["hipblas"].headers.directories) - include.extend(self.spec["hipblas-common"].headers.directories) - include.extend(self.spec["hipblaslt"].headers.directories) - include.extend(self.spec["hipsolver"].headers.directories) # CONTRIBUTING.md says to use TORCHVISION_INCLUDE and TORCHVISION_LIBRARY, but # these do not work for older releases. Build uses a mix of Spack's compiler wrapper From b221734a9b1d17272a268810f07ada1d0ecd7b3d Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Fri, 20 Feb 2026 13:48:08 -0500 Subject: [PATCH 83/84] libtorch_hip.so needs aotriton and hip libs at runtime --- .../packages/py_torchvision/package.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/repos/spack_repo/builtin/packages/py_torchvision/package.py b/repos/spack_repo/builtin/packages/py_torchvision/package.py index 4afaa9895c7..9370f87e0a0 100644 --- a/repos/spack_repo/builtin/packages/py_torchvision/package.py +++ b/repos/spack_repo/builtin/packages/py_torchvision/package.py @@ -198,6 +198,32 @@ def setup_build_environment(self, env: EnvironmentModifications) -> None: include.extend(query.headers.directories) library.extend(query.libs.directories) + # PyTorch headers include rocthrust, rocprim, hipsparse, hipblas, hipblas-common, + # hipblaslt and hipsolver headers; when building with ROCm we need these in the + # include path (py-torch depends on them, but they are not direct link deps of + # torchvision). Only add paths for packages that are in the spec to avoid KeyError. + if "^py-torch+rocm" in self.spec: + rocm_include_pkgs = [ + "rocthrust", + "rocprim", + "hipsparse", + "hipblas", + "hipblas-common", + "hipblaslt", + "hipsolver", + ] + for pkg in rocm_include_pkgs: + if pkg in self.spec: + include.extend(self.spec[pkg].headers.directories) + + # At build time, torchvision's setup imports torch; libtorch_hip.so then + # needs aotriton and hip libs at runtime. Add their lib dirs so the loader + # can resolve undefined symbols (e.g. aotriton::v2::flash::attn_bwd_fused). + for pkg in ["aotriton", "hip"]: + if pkg in self.spec: + for lib_dir in self.spec[pkg].prefix.lib: + env.prepend_path("LD_LIBRARY_PATH", lib_dir) + # CONTRIBUTING.md says to use TORCHVISION_INCLUDE and TORCHVISION_LIBRARY, but # these do not work for older releases. Build uses a mix of Spack's compiler wrapper # and the actual compiler, so this is needed to get parts of the build working. From 5ded4e016f07be66d37852adb070fb8222c9b350 Mon Sep 17 00:00:00 2001 From: renjithravindrankannath Date: Sat, 21 Feb 2026 11:16:03 -0500 Subject: [PATCH 84/84] Add prefix lib dirs when they exist so the loader can find .so files --- .../packages/py_torchvision/package.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/repos/spack_repo/builtin/packages/py_torchvision/package.py b/repos/spack_repo/builtin/packages/py_torchvision/package.py index 9370f87e0a0..011ab95c08a 100644 --- a/repos/spack_repo/builtin/packages/py_torchvision/package.py +++ b/repos/spack_repo/builtin/packages/py_torchvision/package.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import os from spack_repo.builtin.build_systems.python import PythonPackage @@ -220,11 +221,21 @@ def setup_build_environment(self, env: EnvironmentModifications) -> None: # needs aotriton and hip libs at runtime. Add their lib dirs so the loader # can resolve undefined symbols (e.g. aotriton::v2::flash::attn_bwd_fused). for pkg in ["aotriton", "hip"]: - if pkg in self.spec: - for lib_dir in self.spec[pkg].prefix.lib: + if pkg not in self.spec: + continue + try: + for lib_dir in self.spec[pkg].libs.directories: env.prepend_path("LD_LIBRARY_PATH", lib_dir) - - # CONTRIBUTING.md says to use TORCHVISION_INCLUDE and TORCHVISION_LIBRARY, but + except NoLibrariesError: + # Package may not declare 'libraries' (e.g. aotriton), so Spack + # cannot recursively locate libs. Add prefix lib dirs when they + # exist so the loader can find .so files (lib, lib64, or both). + for sub in ("lib", "lib64"): + lib_dir = os.path.join(self.spec[pkg].prefix, sub) + if os.path.isdir(lib_dir): + env.prepend_path("LD_LIBRARY_PATH", lib_dir) + + # CONTRIBUTING.md says to use TORCHVISION_INCLUDE and TORCHVISION_LIBRARY, but # these do not work for older releases. Build uses a mix of Spack's compiler wrapper # and the actual compiler, so this is needed to get parts of the build working. # See https://github.com/pytorch/vision/issues/2591