From 89327e0a96e9b480e807211be79749741eb78fc1 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Sat, 20 Jan 2024 02:02:08 +0000 Subject: [PATCH] [SYCL][ESIMD] Require inlining of some noinline functions due to VC limitation (#12440) We need to inline some `nolinine` functions because VC doesn't support debugging/`O0`/`-g` in order to at least make user code do the right thing. Otherwise, we get wrong answers or GPU hangs. This change fixes 4 `-O0`/`-fno-inline-functions` test failures. --------- Signed-off-by: Nick Sarnie Signed-off-by: Sarnie, Nick --- llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp | 38 ++++++++++++++++++- .../ESIMD/assert_with_volatile_global.ll | 26 +++++++++++++ llvm/test/SYCLLowerIR/ESIMD/force_inline.ll | 16 ++++++++ .../SYCLLowerIR/ESIMD/lower_global_stores.ll | 8 ++-- 4 files changed, 84 insertions(+), 4 deletions(-) create mode 100644 llvm/test/SYCLLowerIR/ESIMD/assert_with_volatile_global.ll diff --git a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp index 0f767e434d918..6b8d0cf56b77f 100644 --- a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp @@ -1737,6 +1737,42 @@ bool SYCLLowerESIMDPass::prepareForAlwaysInliner(Module &M) { F.addFnAttr(llvm::Attribute::NoInline); }; + bool ModuleContainsGenXVolatile = + std::any_of(M.global_begin(), M.global_end(), [](const auto &Global) { + return Global.hasAttribute("genx_volatile"); + }); + + auto requiresInlining = [=](Function &F) { + // If there are any genx_volatile globals in the module, inline + // noinline functions because load/store semantics are not valid for + // these globals and we cannot know for sure if the load/store target + // is one of these globals without inlining. + if (ModuleContainsGenXVolatile) + return true; + + // Otherwise, only inline esimd namespace functions. + StringRef MangledName = F.getName(); + id::ManglingParser Parser(MangledName.begin(), + MangledName.end()); + id::Node *AST = Parser.parse(); + if (!AST || AST->getKind() != id::Node::KFunctionEncoding) + return false; + + auto *FE = static_cast(AST); + const id::Node *NameNode = FE->getName(); + if (!NameNode) + return false; + + if (NameNode->getKind() == id::Node::KLocalName) + return false; + + id::OutputBuffer NameBuf; + NameNode->print(NameBuf); + StringRef Name(NameBuf.getBuffer(), NameBuf.getCurrentPosition()); + + return Name.starts_with("sycl::_V1::ext::intel::esimd::") || + Name.starts_with("sycl::_V1::ext::intel::experimental::esimd::"); + }; bool NeedInline = false; for (auto &F : M) { // If some function already has 'alwaysinline' attribute, then request @@ -1773,7 +1809,7 @@ bool SYCLLowerESIMDPass::prepareForAlwaysInliner(Module &M) { // it had noinline or VCStackCall attrubute. // This code migrated to here without changes, but... VC BE does support // the calls of spir_func these days, so this code needs re-visiting. - if (!F.hasFnAttribute(Attribute::NoInline)) + if (!F.hasFnAttribute(Attribute::NoInline) || requiresInlining(F)) NeedInline |= markAlwaysInlined(F); if (!isSlmInit(F)) diff --git a/llvm/test/SYCLLowerIR/ESIMD/assert_with_volatile_global.ll b/llvm/test/SYCLLowerIR/ESIMD/assert_with_volatile_global.ll new file mode 100644 index 0000000000000..ab4478267b54f --- /dev/null +++ b/llvm/test/SYCLLowerIR/ESIMD/assert_with_volatile_global.ll @@ -0,0 +1,26 @@ +; This test locks down that assert functions are still noinline even if a +; genx_volatile global is present. +; +; RUN: opt < %s -passes=LowerESIMD -S | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.sycl::_V1::ext::intel::esimd::simd" = type { %"class.sycl::_V1::ext::intel::esimd::detail::simd_obj_impl" } +%"class.sycl::_V1::ext::intel::esimd::detail::simd_obj_impl" = type { <16 x float> } + +@va = dso_local global %"class.sycl::_V1::ext::intel::esimd::simd" zeroinitializer, align 64 #0 + +define dso_local spir_func void @__assert_fail(ptr addrspace(4) %ptr) { +; CHECK: define dso_local spir_func void @__assert_fail(ptr addrspace(4) %ptr) #[[#ATTR:]] { + ret void +} + +define dso_local spir_func void @__devicelib_assert_fail(ptr addrspace(4) %ptr) { +; CHECK: define dso_local spir_func void @__devicelib_assert_fail(ptr addrspace(4) %ptr) #[[#ATTR]] { + ret void +} + +; CHECK: attributes #[[#ATTR]] = { noinline } +attributes #0 = { "genx_byte_offset"="192" "genx_volatile" } +!0 = !{} diff --git a/llvm/test/SYCLLowerIR/ESIMD/force_inline.ll b/llvm/test/SYCLLowerIR/ESIMD/force_inline.ll index e48ec20817f70..04b9d39a78434 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/force_inline.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/force_inline.ll @@ -37,6 +37,22 @@ define dso_local spir_kernel void @KERNEL(ptr addrspace(4) %ptr) !sycl_explicit_ ret void } +; Function with "noinline" attribute must be marked with "alwaysinline" if it is an ESIMD namespace function +define dso_local spir_func void @_ZNK4sycl3_V13ext5intel5esimd6detail13simd_obj_implIiLi16ENS3_4simdIiLi16EEEvE4dataEv(ptr addrspace(4) %ptr) #1 { +; CHECK: define dso_local spir_func void @_ZNK4sycl3_V13ext5intel5esimd6detail13simd_obj_implIiLi16ENS3_4simdIiLi16EEEvE4dataEv(ptr addrspace(4) %ptr) #[[ATTRS1]] { +ret void +} + +; assert functions must not be marked with "alwaysinline" +define dso_local spir_func void @__assert_fail(ptr addrspace(4) %ptr) { +; CHECK: define dso_local spir_func void @__assert_fail(ptr addrspace(4) %ptr) #[[ATTRS3]] { + ret void +} + +define dso_local spir_func void @__devicelib_assert_fail(ptr addrspace(4) %ptr) { +; CHECK: define dso_local spir_func void @__devicelib_assert_fail(ptr addrspace(4) %ptr) #[[ATTRS3]] { + ret void +} attributes #0 = { "VCStackCall" } attributes #1 = { noinline } diff --git a/llvm/test/SYCLLowerIR/ESIMD/lower_global_stores.ll b/llvm/test/SYCLLowerIR/ESIMD/lower_global_stores.ll index 3b4dd47ad6ee0..0dce22309f257 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/lower_global_stores.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/lower_global_stores.ll @@ -1,6 +1,6 @@ ; This test checks whether global stores are converted to vstores ; -; RUN: opt < %s -passes=LowerESIMD -S | FileCheck %s +; RUN: opt < %s -passes=LowerESIMD -S | FileCheck --implicit-check-not=noinline %s target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" @@ -11,7 +11,8 @@ target triple = "spir64-unknown-unknown" @va = dso_local global %"class.sycl::_V1::ext::intel::esimd::simd" zeroinitializer, align 64 #0 @vb = dso_local global %"class.sycl::_V1::ext::intel::esimd::simd" zeroinitializer, align 64 #0 -define weak_odr dso_local spir_kernel void @foo() { +define weak_odr dso_local spir_kernel void @foo() #1 { +; CHECK: define weak_odr dso_local spir_kernel void @foo() #[[#ATTR:]] { %1 = call <16 x float> asm "", "=rw"() ; CHECK: call void @llvm.genx.vstore.v16f32.p0(<16 x float> %1, ptr @va) store <16 x float> %1, ptr @va @@ -21,4 +22,5 @@ ret void } attributes #0 = { "genx_byte_offset"="0" "genx_volatile" } -attributes #1 = { "" } \ No newline at end of file +; CHECK: attributes #[[#ATTR]] = { alwaysinline } +attributes #1 = { noinline }