Skip to content

Commit

Permalink
[SYCL][ESIMD] Require inlining of some noinline functions due to VC l…
Browse files Browse the repository at this point in the history
…imitation (#12440)

We need to inline some `nolinine` functions because VC doesn't support
debugging/`O0`/`-g` in order to at least make user code do the right
thing. Otherwise, we get wrong answers or GPU hangs.

This change fixes 4 `-O0`/`-fno-inline-functions` test failures.

---------

Signed-off-by: Nick Sarnie <nick.sarnie@intel.com>
Signed-off-by: Sarnie, Nick <nick.sarnie@intel.com>
  • Loading branch information
sarnex authored Jan 20, 2024
1 parent 7b62154 commit 89327e0
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 4 deletions.
38 changes: 37 additions & 1 deletion llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1737,6 +1737,42 @@ bool SYCLLowerESIMDPass::prepareForAlwaysInliner(Module &M) {
F.addFnAttr(llvm::Attribute::NoInline);
};

bool ModuleContainsGenXVolatile =
std::any_of(M.global_begin(), M.global_end(), [](const auto &Global) {
return Global.hasAttribute("genx_volatile");
});

auto requiresInlining = [=](Function &F) {
// If there are any genx_volatile globals in the module, inline
// noinline functions because load/store semantics are not valid for
// these globals and we cannot know for sure if the load/store target
// is one of these globals without inlining.
if (ModuleContainsGenXVolatile)
return true;

// Otherwise, only inline esimd namespace functions.
StringRef MangledName = F.getName();
id::ManglingParser<SimpleAllocator> Parser(MangledName.begin(),
MangledName.end());
id::Node *AST = Parser.parse();
if (!AST || AST->getKind() != id::Node::KFunctionEncoding)
return false;

auto *FE = static_cast<id::FunctionEncoding *>(AST);
const id::Node *NameNode = FE->getName();
if (!NameNode)
return false;

if (NameNode->getKind() == id::Node::KLocalName)
return false;

id::OutputBuffer NameBuf;
NameNode->print(NameBuf);
StringRef Name(NameBuf.getBuffer(), NameBuf.getCurrentPosition());

return Name.starts_with("sycl::_V1::ext::intel::esimd::") ||
Name.starts_with("sycl::_V1::ext::intel::experimental::esimd::");
};
bool NeedInline = false;
for (auto &F : M) {
// If some function already has 'alwaysinline' attribute, then request
Expand Down Expand Up @@ -1773,7 +1809,7 @@ bool SYCLLowerESIMDPass::prepareForAlwaysInliner(Module &M) {
// it had noinline or VCStackCall attrubute.
// This code migrated to here without changes, but... VC BE does support
// the calls of spir_func these days, so this code needs re-visiting.
if (!F.hasFnAttribute(Attribute::NoInline))
if (!F.hasFnAttribute(Attribute::NoInline) || requiresInlining(F))
NeedInline |= markAlwaysInlined(F);

if (!isSlmInit(F))
Expand Down
26 changes: 26 additions & 0 deletions llvm/test/SYCLLowerIR/ESIMD/assert_with_volatile_global.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
; This test locks down that assert functions are still noinline even if a
; genx_volatile global is present.
;
; RUN: opt < %s -passes=LowerESIMD -S | FileCheck %s

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"

%"class.sycl::_V1::ext::intel::esimd::simd" = type { %"class.sycl::_V1::ext::intel::esimd::detail::simd_obj_impl" }
%"class.sycl::_V1::ext::intel::esimd::detail::simd_obj_impl" = type { <16 x float> }

@va = dso_local global %"class.sycl::_V1::ext::intel::esimd::simd" zeroinitializer, align 64 #0

define dso_local spir_func void @__assert_fail(ptr addrspace(4) %ptr) {
; CHECK: define dso_local spir_func void @__assert_fail(ptr addrspace(4) %ptr) #[[#ATTR:]] {
ret void
}

define dso_local spir_func void @__devicelib_assert_fail(ptr addrspace(4) %ptr) {
; CHECK: define dso_local spir_func void @__devicelib_assert_fail(ptr addrspace(4) %ptr) #[[#ATTR]] {
ret void
}

; CHECK: attributes #[[#ATTR]] = { noinline }
attributes #0 = { "genx_byte_offset"="192" "genx_volatile" }
!0 = !{}
16 changes: 16 additions & 0 deletions llvm/test/SYCLLowerIR/ESIMD/force_inline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,22 @@ define dso_local spir_kernel void @KERNEL(ptr addrspace(4) %ptr) !sycl_explicit_
ret void
}

; Function with "noinline" attribute must be marked with "alwaysinline" if it is an ESIMD namespace function
define dso_local spir_func void @_ZNK4sycl3_V13ext5intel5esimd6detail13simd_obj_implIiLi16ENS3_4simdIiLi16EEEvE4dataEv(ptr addrspace(4) %ptr) #1 {
; CHECK: define dso_local spir_func void @_ZNK4sycl3_V13ext5intel5esimd6detail13simd_obj_implIiLi16ENS3_4simdIiLi16EEEvE4dataEv(ptr addrspace(4) %ptr) #[[ATTRS1]] {
ret void
}

; assert functions must not be marked with "alwaysinline"
define dso_local spir_func void @__assert_fail(ptr addrspace(4) %ptr) {
; CHECK: define dso_local spir_func void @__assert_fail(ptr addrspace(4) %ptr) #[[ATTRS3]] {
ret void
}

define dso_local spir_func void @__devicelib_assert_fail(ptr addrspace(4) %ptr) {
; CHECK: define dso_local spir_func void @__devicelib_assert_fail(ptr addrspace(4) %ptr) #[[ATTRS3]] {
ret void
}

attributes #0 = { "VCStackCall" }
attributes #1 = { noinline }
Expand Down
8 changes: 5 additions & 3 deletions llvm/test/SYCLLowerIR/ESIMD/lower_global_stores.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; This test checks whether global stores are converted to vstores
;
; RUN: opt < %s -passes=LowerESIMD -S | FileCheck %s
; RUN: opt < %s -passes=LowerESIMD -S | FileCheck --implicit-check-not=noinline %s

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"
Expand All @@ -11,7 +11,8 @@ target triple = "spir64-unknown-unknown"
@va = dso_local global %"class.sycl::_V1::ext::intel::esimd::simd" zeroinitializer, align 64 #0
@vb = dso_local global %"class.sycl::_V1::ext::intel::esimd::simd" zeroinitializer, align 64 #0

define weak_odr dso_local spir_kernel void @foo() {
define weak_odr dso_local spir_kernel void @foo() #1 {
; CHECK: define weak_odr dso_local spir_kernel void @foo() #[[#ATTR:]] {
%1 = call <16 x float> asm "", "=rw"()
; CHECK: call void @llvm.genx.vstore.v16f32.p0(<16 x float> %1, ptr @va)
store <16 x float> %1, ptr @va
Expand All @@ -21,4 +22,5 @@ ret void
}

attributes #0 = { "genx_byte_offset"="0" "genx_volatile" }
attributes #1 = { "" }
; CHECK: attributes #[[#ATTR]] = { alwaysinline }
attributes #1 = { noinline }

0 comments on commit 89327e0

Please sign in to comment.