From 85fd74dd32cd10c9ff6bfa73077b7e693a5e22dd Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Fri, 24 Jan 2025 03:38:29 -0500
Subject: [PATCH] Add cuda 12.8 support (#2551)

CUDA 12.8 introduces sm_120 that requires a reduced number of threads per sm.

We also need to pass `-static-global-template-stub=false` when building with 12.8 as we violate CUDA ODR kernel rules

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Corey J. Nolet (https://github.com/cjnolet)
  - https://github.com/jakirkham

URL: https://github.com/rapidsai/raft/pull/2551
---
 cpp/cmake/modules/ConfigureCUDA.cmake            | 8 +++++++-
 cpp/include/raft/neighbors/detail/nn_descent.cuh | 5 +++--
 2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake
index 25b9b0ddf8..fbf4428650 100644
--- a/cpp/cmake/modules/ConfigureCUDA.cmake
+++ b/cpp/cmake/modules/ConfigureCUDA.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -29,6 +29,12 @@ if(CMAKE_COMPILER_IS_GNUCXX)
   if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0)
     list(APPEND RAFT_CUDA_FLAGS -Werror=all-warnings)
   endif()
+
+  # Allow invalid CUDA kernels in the short term
+  if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8.0)
+    list(APPEND RAFT_CUDA_FLAGS -static-global-template-stub=false)
+  endif()
+
 endif()
 
 if(CUDA_LOG_COMPILE_TIME)
diff --git a/cpp/include/raft/neighbors/detail/nn_descent.cuh b/cpp/include/raft/neighbors/detail/nn_descent.cuh
index 02610f9afb..64e4a3ea7a 100644
--- a/cpp/include/raft/neighbors/detail/nn_descent.cuh
+++ b/cpp/include/raft/neighbors/detail/nn_descent.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -709,7 +709,8 @@ template <typename Index_t,
           typename epilogue_op = DistEpilogue<Index_t, DistData_t>>
 RAFT_KERNEL
 #ifdef __CUDA_ARCH__
-#if (__CUDA_ARCH__) == 750 || ((__CUDA_ARCH__) >= 860 && (__CUDA_ARCH__) <= 890)
+#if (__CUDA_ARCH__) == 750 || ((__CUDA_ARCH__) >= 860 && (__CUDA_ARCH__) <= 890) || \
+  (__CUDA_ARCH__) == 1200
 __launch_bounds__(BLOCK_SIZE)
 #else
 __launch_bounds__(BLOCK_SIZE, 4)