From d4a9254d764a0ff0be8514a6854afda833a268ce Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Sat, 16 Mar 2024 02:47:03 +0900 Subject: [PATCH] [SYCL][ESIMD] Use intrinsic for named_barrier_signal (#12982) We don't want to use raw_send here. I manually tested this on PVC with the required driver version. --------- Signed-off-by: Sarnie, Nick --- llvm/lib/SYCLLowerIR/CMakeLists.txt | 6 ++--- llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp | 3 +-- .../test/SYCLLowerIR/esimd_lower_nbarriers.ll | 21 ---------------- .../esimd/detail/memory_intrin.hpp | 24 +++++++------------ .../ext/intel/experimental/esimd/memory.hpp | 13 ++-------- sycl/test/esimd/nbarriers.cpp | 2 +- 6 files changed, 15 insertions(+), 54 deletions(-) delete mode 100644 llvm/test/SYCLLowerIR/esimd_lower_nbarriers.ll diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt index da38e5eaa5462..59f409533b10d 100644 --- a/llvm/lib/SYCLLowerIR/CMakeLists.txt +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -14,9 +14,9 @@ if (NOT TARGET LLVMGenXIntrinsics) if (NOT DEFINED LLVMGenXIntrinsics_SOURCE_DIR) set(LLVMGenXIntrinsics_GIT_REPO https://github.com/intel/vc-intrinsics.git) - # Date: Dec 14, 2023 - # Add support for XeHPCVG platform - set(LLVMGenXIntrinsics_GIT_TAG da892e1982b6c25b9a133f85b4ac97142d8a3def) + # Date: 13 Feb 2024 + # Add an intrinsic for named barrier arrive/signal operation + set(LLVMGenXIntrinsics_GIT_TAG f9c34404d0ea9abad83875a10bd48d88cea90ebd) message(STATUS "vc-intrinsics repo is missing. Will try to download it from ${LLVMGenXIntrinsics_GIT_REPO}") include(FetchContent) diff --git a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp index 34376fc693e0c..27575bb643a25 100644 --- a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp @@ -494,8 +494,7 @@ class ESIMDIntrinDescTable { {"dpasw", {"dpasw", {a(0), a(1), a(2), t(0)}}}, {"dpasw_nosrc0", {"dpasw.nosrc0", {a(0), a(1), t(0)}}}, {"nbarrier", {"nbarrier", {a(0), a(1), a(2)}}}, - {"raw_send_nbarrier_signal", - {"raw.send.noresult", {a(0), ai1(4), a(1), a(2), a(3)}}}, + {"nbarrier_arrive", {"nbarrier.arrive", {a(0), a(1), a(2), a(3)}}}, {"lsc_load_slm", {"lsc.load.slm", {ai1(0), c8(lsc_subopcode::load), t8(1), t8(2), t16(3), t32(4), t8(5), diff --git a/llvm/test/SYCLLowerIR/esimd_lower_nbarriers.ll b/llvm/test/SYCLLowerIR/esimd_lower_nbarriers.ll deleted file mode 100644 index b110c588b4e56..0000000000000 --- a/llvm/test/SYCLLowerIR/esimd_lower_nbarriers.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: opt < %s -passes=LowerESIMD -S | FileCheck %s - -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -; Function Attrs: convergent norecurse mustprogress -define dso_local spir_kernel void @_ZTSZ6calleriE12kernel_esimd() !sycl_explicit_simd !3 { -entry: -; CHECK: call void @llvm.genx.nbarrier(i8 0, i8 2, i8 0) - call spir_func void @_Z16__esimd_nbarrierhhh(i8 zeroext 0, i8 zeroext 2, i8 zeroext 0) - -; CHECK: call void @llvm.genx.raw.send.noresult.i1.v8i32(i32 0, i1 true, i32 3, i32 33554436, <8 x i32> ) - call spir_func void @_Z32__esimd_raw_send_nbarrier_signalIjLi8EEvjjjN2cl4sycl5INTEL3gpu6detail11vector_typeIT_XT0_EE4typeEt(i32 0, i32 3, i32 33554436, <8 x i32> , i16 zeroext 1) - - ret void -} -!3 = !{} - -declare dso_local spir_func void @_Z16__esimd_nbarrierhhh(i8 zeroext, i8 zeroext, i8 zeroext) local_unnamed_addr #1 -declare dso_local spir_func void @_Z32__esimd_raw_send_nbarrier_signalIjLi8EEvjjjN2cl4sycl5INTEL3gpu6detail11vector_typeIT_XT0_EE4typeEt(i32, i32, i32, <8 x i32>, i16 zeroext) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 96780df8c6eb3..780b6fee62d91 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -41,26 +41,18 @@ __ESIMD_INTRIN void __esimd_nbarrier(uint8_t mode, uint8_t id, /// @param count - number of named barriers __ESIMD_INTRIN void __esimd_nbarrier_init(uint8_t count) __ESIMD_INTRIN_END; -/// Raw send signal to perform signal operation on named barriers +/// Perform signal operation on named barriers /// Available only on PVC -/// @tparam Ty - message element type +/// @param id - barrier id /// -/// @tparam N - message length +/// @param thread_role - thread role /// -/// @param is_sendc - is sendc +/// @param num_producers - number of producers /// -/// @param extended_descriptor - extended message descriptor -/// -/// @param descriptor - message descriptor -/// -/// @param msg_var - source operand of send message -/// -/// @param pred - predicate for enabled channels -template -__ESIMD_INTRIN void __esimd_raw_send_nbarrier_signal( - uint32_t is_sendc, uint32_t extended_descriptor, uint32_t descriptor, - __ESIMD_DNS::vector_type_t msg_var, - uint16_t pred = 1) __ESIMD_INTRIN_END; +/// @param num_consumers - number of consumers +__ESIMD_INTRIN void +__esimd_nbarrier_arrive(uint8_t id, uint8_t thread_role, uint8_t num_producers, + uint8_t num_consumers) __ESIMD_INTRIN_END; /// 2D USM pointer block load. /// Supported platforms: PVC diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp index 085cc33035fe2..c1e64654a8e2d 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp @@ -393,19 +393,10 @@ __ESIMD_API void named_barrier_signal(uint8_t barrier_id, uint8_t producer_consumer_mode, uint32_t num_producers, uint32_t num_consumers) { - constexpr uint32_t gateway = 3; - constexpr uint32_t barrier = 4; - constexpr uint32_t descriptor = 1 << 25 | // Message length: 1 register - 0 << 12 | // Fence Data Ports: No fence - barrier; // Barrier subfunction - - __ESIMD_DNS::vector_type_t payload = 0; - payload[2] = (num_consumers & 0xff) << 24 | (num_producers & 0xff) << 16 | - producer_consumer_mode << 14 | (barrier_id & 0b11111) << 0; __esimd_fence(__ESIMD_NS::fence_mask::global_coherent_fence | __ESIMD_NS::fence_mask::local_barrier); - __esimd_raw_send_nbarrier_signal( - 0 /*sendc*/, gateway, descriptor, payload, 1 /*pred*/); + __esimd_nbarrier_arrive(barrier_id, producer_consumer_mode, num_producers, + num_consumers); } /// Create explicit scoreboard dependency to avoid device code motion diff --git a/sycl/test/esimd/nbarriers.cpp b/sycl/test/esimd/nbarriers.cpp index 82374f4ae96f8..fced9a6611007 100644 --- a/sycl/test/esimd/nbarriers.cpp +++ b/sycl/test/esimd/nbarriers.cpp @@ -16,7 +16,7 @@ void caller(int x) { named_barrier_init<7>(); named_barrier_wait(2); // CHECK: call spir_func void @_Z13__esimd_fenceh(i8 noundef zeroext 33) - // CHECK-NEXT: call spir_func void @_Z32__esimd_raw_send_nbarrier_signal{{.*}} + // CHECK-NEXT: call spir_func void @_Z23__esimd_nbarrier_arrive{{.*}} named_barrier_signal(0, 0, 4, 4); }); }