From 85fd74dd32cd10c9ff6bfa73077b7e693a5e22dd Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 24 Jan 2025 03:38:29 -0500 Subject: [PATCH] Add cuda 12.8 support (#2551) CUDA 12.8 introduces sm_120 that requires a reduced number of threads per sm. We also need to pass `-static-global-template-stub=false` when building with 12.8 as we violate CUDA ODR kernel rules Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Corey J. Nolet (https://github.com/cjnolet) - https://github.com/jakirkham URL: https://github.com/rapidsai/raft/pull/2551 --- cpp/cmake/modules/ConfigureCUDA.cmake | 8 +++++++- cpp/include/raft/neighbors/detail/nn_descent.cuh | 5 +++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index 25b9b0ddf8..fbf4428650 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# Copyright (c) 2018-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -29,6 +29,12 @@ if(CMAKE_COMPILER_IS_GNUCXX) if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0) list(APPEND RAFT_CUDA_FLAGS -Werror=all-warnings) endif() + + # Allow invalid CUDA kernels in the short term + if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8.0) + list(APPEND RAFT_CUDA_FLAGS -static-global-template-stub=false) + endif() + endif() if(CUDA_LOG_COMPILE_TIME) diff --git a/cpp/include/raft/neighbors/detail/nn_descent.cuh b/cpp/include/raft/neighbors/detail/nn_descent.cuh index 02610f9afb..64e4a3ea7a 100644 --- a/cpp/include/raft/neighbors/detail/nn_descent.cuh +++ b/cpp/include/raft/neighbors/detail/nn_descent.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -709,7 +709,8 @@ template > RAFT_KERNEL #ifdef __CUDA_ARCH__ -#if (__CUDA_ARCH__) == 750 || ((__CUDA_ARCH__) >= 860 && (__CUDA_ARCH__) <= 890) +#if (__CUDA_ARCH__) == 750 || ((__CUDA_ARCH__) >= 860 && (__CUDA_ARCH__) <= 890) || \ + (__CUDA_ARCH__) == 1200 __launch_bounds__(BLOCK_SIZE) #else __launch_bounds__(BLOCK_SIZE, 4)