From b02d5f83ae90cc2c7caa1d2f0dd171b46c227133 Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Tue, 28 Jan 2025 17:37:36 -0800 Subject: [PATCH] Use arch-specific PTX on Blackwell --- csrc/runtime/compiled_kernel.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csrc/runtime/compiled_kernel.cpp b/csrc/runtime/compiled_kernel.cpp index b41103c3957..000bdfa7f75 100644 --- a/csrc/runtime/compiled_kernel.cpp +++ b/csrc/runtime/compiled_kernel.cpp @@ -520,8 +520,8 @@ void fillCompileOptions( std::string compute = std::string("--gpu-architecture=") + (compile_to_sass ? "sm_" : "compute_") + std::to_string(major) + std::to_string(minor); - if (major == 9) { - // Hopper MMAs require 90a instead of 90 + if (major >= 9) { + // Use 90a and 100a so that arch-specific PTX is available compute += "a"; } nvrtc_compile_driver.setOption(compute);