diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_add.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_add.cl index 8ab2604cde0c4..c1d69efc5b477 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_add.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_add.cl @@ -71,6 +71,13 @@ Memory order is stored in the lowest 5 bits */ ADDR_SPACE, ADDR_SPACE_NV) \ } \ break; \ + case SequentiallyConsistent: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_FENCE_SC_SM70() \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, _acq_rel) \ + break; \ + } \ } \ __builtin_trap(); \ __builtin_unreachable(); \ diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl index fceeda0f60361..19d4dca833fef 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include #include #include @@ -120,6 +121,13 @@ Memory order is stored in the lowest 5 bits */ \ TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \ } \ break; \ + case SequentiallyConsistent: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_FENCE_SC_SM70() \ + __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \ + break; \ + } \ } \ __builtin_trap(); \ __builtin_unreachable(); \ diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h index 56c20cece7935..ecffd9e82d2fe 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h @@ -72,6 +72,15 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); } \ } +#define __CLC_NVVM_FENCE_SC_SM70() \ + if (scope == CrossDevice) { \ + __asm__ __volatile__("fence.sc.sys;"); \ + } else if (scope == Device) { \ + __asm__ __volatile__("fence.sc.gpu;"); \ + } else { \ + __asm__ __volatile__("fence.sc.cta;"); \ + } + #define __CLC_NVVM_ATOMIC_IMPL( \ TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, NAME_MANGLED, \ ADDR_SPACE, POINTER_AND_ADDR_SPACE_MANGLED, ADDR_SPACE_NV, SUBSTITUTION) \ @@ -117,6 +126,13 @@ Memory order is stored in the lowest 5 bits */ \ OP, ADDR_SPACE, ADDR_SPACE_NV) \ } \ break; \ + case SequentiallyConsistent: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_FENCE_SC_SM70() \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \ + break; \ + } \ } \ __builtin_trap(); \ __builtin_unreachable(); \ diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl index 54483b8c5ec25..60311a978762d 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include #include #include @@ -53,6 +54,12 @@ Memory order is stored in the lowest 5 bits */ \ case Acquire: \ __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ + break; \ + case SequentiallyConsistent: \ + __CLC_NVVM_FENCE_SC_SM70() \ + __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ + break; \ } \ } else { \ TYPE_NV res = __nvvm_volatile_ld##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl index 1aaf1c8ab8499..b2e23cd76eac2 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include #include #include @@ -54,6 +55,13 @@ Memory order is stored in the lowest 5 bits */ \ __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ ADDR_SPACE, ADDR_SPACE_NV, \ _release) \ + break; \ + case SequentiallyConsistent: \ + __CLC_NVVM_FENCE_SC_SM70() \ + __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, \ + _release) \ + break; \ } \ } else { \ switch (order) { \ diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 6b02bd454e7b9..319763ca97b99 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -57,13 +57,13 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 09be0881b727fadb1c04b38c00d2562d7dc6875f - # Merge: bb589ca8 e9f855d4 + # commit 29ee45c4451a682f744146cc9dbeb2617ecdd6b3 + # Merge: db4b0c14 4f5d005a # Author: Kenneth Benzie (Benie) - # Date: Thu Mar 14 22:10:28 2024 +0000 - # Merge pull request #1429 from nrspruit/l0_p2p_device_query - # [L0] Support for urUsmP2PPeerAccessGetInfoExp to query p2p access info - set(UNIFIED_RUNTIME_TAG 09be0881b727fadb1c04b38c00d2562d7dc6875f) + # Date: Mon Mar 18 12:14:26 2024 +0000 + # Merge pull request #1291 from JackAKirk/cuda-seq-cst-b + # [CUDA] Report that devices with cc >= sm_70 support seq_cst + set(UNIFIED_RUNTIME_TAG 29ee45c4451a682f744146cc9dbeb2617ecdd6b3) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")