Skip to content

Commit

Permalink
[SYCL][SCLA] Add basic sycl_ext_oneapi_private_alloca functionality (
Browse files Browse the repository at this point in the history
…#12966)

After a105055 implementing CodeGen capabilities for
`sycl_ext_oneapi_private_alloca`, this patch handles the generated
intrinsic in `sycl-post-link` for targets with native specialization
constants support.

Headers for the new extension are also added, as well as a feature test
macro.

`multi_ptr` definitions in the SYCL headers are annotated with the
`__sycl_detail__::sycl_type` to be detected by the frontend.

---------

Signed-off-by: Victor Perez <victor.perez@codeplay.com>
  • Loading branch information
victor-eds authored Mar 18, 2024
1 parent cf402b8 commit 4ff8fcf
Show file tree
Hide file tree
Showing 15 changed files with 400 additions and 13 deletions.
19 changes: 19 additions & 0 deletions llvm/include/llvm/IR/IntrinsicInst.h
Original file line number Diff line number Diff line change
Expand Up @@ -1809,6 +1809,25 @@ class ConvergenceControlInst : public IntrinsicInst {
}
};

/// This represents the llvm.sycl.alloca intrinsic.
class SYCLAllocaInst : public IntrinsicInst {
public:
static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::sycl_alloca;
}

static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}

unsigned getAddressSpace() const;
Value *getSizeSymbolicID() const;
Value *getSizeDefaultValue() const;
Value *getRTBuffer() const;
Type *getAllocatedType() const;
Align getAlign() const;
};

} // end namespace llvm

#endif // LLVM_IR_INTRINSICINST_H
18 changes: 18 additions & 0 deletions llvm/lib/IR/IntrinsicInst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -980,3 +980,21 @@ Value *GCRelocateInst::getDerivedPtr() const {
return *(Opt->Inputs.begin() + getDerivedPtrIndex());
return *(GCInst->arg_begin() + getDerivedPtrIndex());
}

unsigned SYCLAllocaInst::getAddressSpace() const {
return getType()->getPointerAddressSpace();
}

Value *SYCLAllocaInst::getSizeSymbolicID() const { return getArgOperand(0); }

Value *SYCLAllocaInst::getSizeDefaultValue() const { return getArgOperand(1); }

Value *SYCLAllocaInst::getRTBuffer() const { return getArgOperand(2); }

Type *SYCLAllocaInst::getAllocatedType() const {
return getFunctionType()->getFunctionParamType(3);
}

Align SYCLAllocaInst::getAlign() const {
return cast<ConstantInt>(getArgOperand(4))->getAlignValue();
}
60 changes: 60 additions & 0 deletions llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
; RUN: sycl-post-link -spec-const=native < %s -S -o %t.table
; RUN: FileCheck %s -check-prefixes=CHECK-RT < %t_0.ll
; RUN: FileCheck %s --check-prefixes=CHECK-PROPS < %t_0.prop

; This test checks that the post link tool is able to correctly transform
; SYCL alloca intrinsics in SPIR-V devices.

%"class.sycl::_V1::specialization_id" = type { i64 }
%"class.sycl::_V1::specialization_id.0" = type { i32 }
%"class.sycl::_V1::specialization_id.1" = type { i16 }
%my_range = type { ptr addrspace(4), ptr addrspace(4) }

@size_i64 = internal addrspace(1) constant %"class.sycl::_V1::specialization_id" { i64 10 }, align 8
@size_i32 = internal addrspace(1) constant %"class.sycl::_V1::specialization_id.0" { i32 120 }, align 4
@size_i16 = internal addrspace(1) constant %"class.sycl::_V1::specialization_id.1" { i16 1 }, align 2

; Check that the following globals are preserved: even though they are not used
; in the module anymore, they could still be referenced by debug info metadata
; (specialization_id objects are used as template arguments in SYCL
; specialization constant APIs).
; CHECK: @size_i64
; CHECK: @size_i32
; CHECK: @size_i16

@size_i64_stable_name = private unnamed_addr constant [36 x i8] c"_ZTS14name_generatorIL_Z8size_i64EE\00", align 1
@size_i32_stable_name = private unnamed_addr constant [36 x i8] c"_ZTS14name_generatorIL_Z8size_i32EE\00", align 1
@size_i16_stable_name = private unnamed_addr constant [36 x i8] c"_ZTS14name_generatorIL_Z8size_i16EE\00", align 1

; CHECK-LABEL: define dso_local void @private_alloca
define dso_local void @private_alloca() {
; CHECK-RT: [[LENGTH:%.*]] = call i32 @_Z20__spirv_SpecConstantii(i32 1, i32 120)
; CHECK-RT: {{.*}} = alloca double, i32 [[LENGTH]], align 8
call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64(ptr addrspace(4) addrspacecast (ptr @size_i32_stable_name to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @size_i32 to ptr addrspace(4)), ptr addrspace(4) null, double 0.000000e+00, i64 8)
; CHECK-RT: [[LENGTH:%.*]] = call i64 @_Z20__spirv_SpecConstantix(i32 0, i64 10)
; CHECK-RT: {{.*}} = alloca float, i64 [[LENGTH]], align 8
call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f32(ptr addrspace(4) addrspacecast (ptr @size_i64_stable_name to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @size_i64 to ptr addrspace(4)), ptr addrspace(4) null, float 0.000000e+00, i64 8)
call ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_my_range(ptr addrspace(4) addrspacecast (ptr @size_i16_stable_name to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @size_i16 to ptr addrspace(4)), ptr addrspace(4) null, %my_range zeroinitializer, i64 64)
ret void
}

declare ptr @llvm.sycl.alloca.p0.p4.p4.p4.f32(ptr addrspace(4), ptr addrspace(4), ptr addrspace(4), float, i64)
declare ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64(ptr addrspace(4), ptr addrspace(4), ptr addrspace(4), double, i64)
declare ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_my_range(ptr addrspace(4), ptr addrspace(4), ptr addrspace(4), %my_range, i64)

; CHECK-RT: !sycl.specialization-constants = !{![[#ID0:]], ![[#ID1:]], ![[#ID2:]]}
; CHECK-RT: !sycl.specialization-constants-default-values = !{![[#DEF0:]], ![[#DEF1:]], ![[#DEF2:]]}

; CHECK-RT: ![[#ID0]] = !{!"_ZTS14name_generatorIL_Z8size_i64EE", i32 0, i32 0, i32 8}
; CHECK-RT: ![[#ID1]] = !{!"_ZTS14name_generatorIL_Z8size_i32EE", i32 1, i32 0, i32 4}
; CHECK-RT: ![[#ID2]] = !{!"_ZTS14name_generatorIL_Z8size_i16EE", i32 2, i32 0, i32 2}
; CHECK-RT: ![[#DEF0]] = !{i64 10}
; CHECK-RT: ![[#DEF1]] = !{i32 120}
; CHECK-RT: ![[#DEF2]] = !{i16 1}

; CHECK-PROPS: [SYCL/specialization constants]
; CHECK-PROPS: _ZTS14name_generatorIL_Z8size_i64EE=2|
; CHECK-PROPS: _ZTS14name_generatorIL_Z8size_i32EE=2|
; CHECK-PROPS: _ZTS14name_generatorIL_Z8size_i16EE=2|
; CHECK-PROPS: [SYCL/specialization constants default values]
; CHECK-PROPS: all=2|
56 changes: 46 additions & 10 deletions llvm/tools/sycl-post-link/SpecConstants.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"

#include <vector>
Expand Down Expand Up @@ -818,8 +819,11 @@ PreservedAnalyses SpecConstantsPass::run(Module &M,
if (!F.isDeclaration())
continue;

const bool IsSYCLAlloca = F.getIntrinsicID() == Intrinsic::sycl_alloca;

if (!F.getName().starts_with(SYCL_GET_SCALAR_2020_SPEC_CONST_VAL) &&
!F.getName().starts_with(SYCL_GET_COMPOSITE_2020_SPEC_CONST_VAL))
!F.getName().starts_with(SYCL_GET_COMPOSITE_2020_SPEC_CONST_VAL) &&
!IsSYCLAlloca)
continue;

SmallVector<CallInst *, 32> SCIntrCalls;
Expand All @@ -838,21 +842,39 @@ PreservedAnalyses SpecConstantsPass::run(Module &M,

SmallVector<Instruction *, 3> DelInsts;
DelInsts.push_back(CI);
Type *SCTy = CI->getType();
unsigned NameArgNo = 0;
Function *Callee = CI->getCalledFunction();
assert(Callee && "Failed to get spec constant call");
bool HasSretParameter = Callee->hasStructRetAttr();

// Structs are returned via 'sret' arguments if they are larger than 64b
if (HasSretParameter) {
// Get structure type stored in an argument annotated with 'sret'
// parameter attribute and skip it.
SCTy = Callee->getParamStructRetType(NameArgNo++);
}
bool HasSretParameter = Callee->hasStructRetAttr();
assert(!(HasSretParameter && IsSYCLAlloca) &&
"'llvm.sycl.alloca' returns a pointer");
// Skip 'sret' parameter.
unsigned NameArgNo = HasSretParameter ? 1 : 0;

StringRef SymID = getStringLiteralArg(CI, NameArgNo, DelInsts);
Value *Replacement = nullptr;

Constant *DefaultValue = getSpecConstInitializerFromCI(CI, NameArgNo + 1);
Type *SCTy;
if (HasSretParameter) {
// Specialization constant type is given by the 'sret' parameter.
SCTy = Callee->getParamStructRetType(0);
} else if (IsSYCLAlloca) {
// 'llvm.sycl.alloca' returns a pointer, so we need to take the
// specialization constant type from the default value. At this stage,
// we will have lost the original scalar representation of the type, so
// we have to take the in-memory representation. This is only relevant
// when a 'bool' ('i1' scalar representation and 'i8' in-memory
// representation) specialization constant is used as size. In that
// case, for a value of 'true' (the only legal value), the default value
// will be 1 ('i8'), thus keeping the original semantics.
SCTy = DefaultValue->getType();
} else {
// Specialization constant type is the same as the one returned by the
// function in the general case.
SCTy = CI->getType();
}

bool IsNewSpecConstant = false;
unsigned Padding = 0;
Expand All @@ -872,6 +894,17 @@ PreservedAnalyses SpecConstantsPass::run(Module &M,
// 3. Transform to spirv intrinsic _Z*__spirv_SpecConstant* or
// _Z*__spirv_SpecConstantComposite
Replacement = emitSpecConstantRecursive(SCTy, CI, IDs, DefaultValue);
if (IsSYCLAlloca) {
// In case this is a 'sycl.llvm.alloca' intrinsic, use the emitted
// specialization constant as the allocation size.
auto *Intr = cast<SYCLAllocaInst>(CI);
Value *ArraySize = Replacement;
assert(ArraySize->getType()->isIntegerTy() &&
"Expecting integer type");
Replacement =
new AllocaInst(Intr->getAllocatedType(), Intr->getAddressSpace(),
ArraySize, Intr->getAlign(), "alloca", CI);
}
if (IsNewSpecConstant) {
// emitSpecConstantRecursive might emit more than one spec constant
// (because of composite types) and therefore, we need to adjust
Expand All @@ -884,6 +917,8 @@ PreservedAnalyses SpecConstantsPass::run(Module &M,
M, SymID, SCTy, IDs, /* is native spec constant */ true);
}
} else if (Mode == HandlingMode::emulation) {
assert(!IsSYCLAlloca && "sycl_ext_oneapi_private_alloca not yet "
"supported in emulation mode");
// 2a. Spec constant will be passed as kernel argument;

// Replace it with a load from the pointer to the specialization
Expand Down Expand Up @@ -1043,7 +1078,8 @@ bool SpecConstantsPass::collectSpecConstantDefaultValuesMetadata(
bool llvm::checkModuleContainsSpecConsts(const Module &M) {
for (const Function &F : M.functions()) {
if (F.getName().starts_with(SYCL_GET_SCALAR_2020_SPEC_CONST_VAL) ||
F.getName().starts_with(SYCL_GET_COMPOSITE_2020_SPEC_CONST_VAL))
F.getName().starts_with(SYCL_GET_COMPOSITE_2020_SPEC_CONST_VAL) ||
F.getIntrinsicID() == llvm::Intrinsic::sycl_alloca)
return true;
}

Expand Down
6 changes: 6 additions & 0 deletions sycl/include/sycl/detail/defines.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,9 @@
#else
#define __SYCL_TYPE(x)
#endif

#if __has_cpp_attribute(clang::builtin_alias)
#define __SYCL_BUILTIN_ALIAS(x) [[clang::builtin_alias(x)]]
#else
#define __SYCL_BUILTIN_ALIAS(x)
#endif
51 changes: 51 additions & 0 deletions sycl/include/sycl/ext/oneapi/experimental/alloca.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
//==--- alloca.hpp --- SYCL extension for private memory allocations--------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include "sycl/exception.hpp"
#include "sycl/kernel_handler.hpp"
#include "sycl/multi_ptr.hpp"

namespace sycl {
inline namespace _V1 {
namespace ext::oneapi::experimental {

#ifdef __SYCL_DEVICE_ONLY__

// On the device, this is an alias to __builtin_intel_sycl_alloca.

/// Function allocating and returning a pointer to an unitialized region of
/// memory capable of hosting `kh.get_specialization_constant<SizeSpecName>()`
/// elements of type \tp ElementType. The pointer will be a `sycl::private_ptr`
/// and will or will not be decorated depending on \tp DecorateAddres.
///
/// On the host, this function simply throws, as this is not supported there.
///
/// See sycl_ext_oneapi_private_alloca.
template <typename ElementType, auto &SizeSpecName,
access::decorated DecorateAddress>
__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca)
private_ptr<ElementType, DecorateAddress> private_alloca(kernel_handler &kh);

#else

// On the host, throw, this is not supported.
template <typename ElementType, auto &SizeSpecName,
access::decorated DecorateAddress>
private_ptr<ElementType, DecorateAddress> private_alloca(kernel_handler &kh) {
throw feature_not_supported("sycl::ext::oneapi::experimental::private_alloca "
"is not supported on host",
PI_ERROR_INVALID_OPERATION);
}

#endif // __SYCL_DEVICE_ONLY__

} // namespace ext::oneapi::experimental
} // namespace _V1
} // namespace sycl
6 changes: 3 additions & 3 deletions sycl/include/sycl/multi_ptr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ template <typename dataT, int dimensions> class local_accessor;
// should be removed.
template <typename ElementType, access::address_space Space,
access::decorated DecorateAddress = access::decorated::legacy>
class multi_ptr {
class __SYCL_TYPE(multi_ptr) multi_ptr {
private:
using decorated_type =
typename detail::DecoratedType<ElementType, Space>::type;
Expand Down Expand Up @@ -444,7 +444,7 @@ class multi_ptr {

/// Specialization of multi_ptr for const void.
template <access::address_space Space, access::decorated DecorateAddress>
class multi_ptr<const void, Space, DecorateAddress> {
class __SYCL_TYPE(multi_ptr) multi_ptr<const void, Space, DecorateAddress> {
private:
using decorated_type =
typename detail::DecoratedType<const void, Space>::type;
Expand Down Expand Up @@ -592,7 +592,7 @@ class multi_ptr<const void, Space, DecorateAddress> {

// Specialization of multi_ptr for void.
template <access::address_space Space, access::decorated DecorateAddress>
class multi_ptr<void, Space, DecorateAddress> {
class __SYCL_TYPE(multi_ptr) multi_ptr<void, Space, DecorateAddress> {
private:
using decorated_type = typename detail::DecoratedType<void, Space>::type;

Expand Down
1 change: 1 addition & 0 deletions sycl/source/feature_test.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ inline namespace _V1 {
#define SYCL_EXT_ONEAPI_IN_ORDER_QUEUE_EVENTS 1
#define SYCL_EXT_INTEL_MATRIX 1
#define SYCL_EXT_INTEL_FPGA_TASK_SEQUENCE 1
#define SYCL_EXT_ONEAPI_PRIVATE_ALLOCA 1

#ifndef __has_include
#define __has_include(x) 0
Expand Down
55 changes: 55 additions & 0 deletions sycl/test-e2e/PrivateAlloca/Inputs/private_alloca_test.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#pragma once

// Template for private alloca tests.

#include <sycl/sycl.hpp>

#include <sycl/ext/oneapi/experimental/alloca.hpp>

template <typename ElementType, typename SizeType,
sycl::access::decorated DecorateAddress>
class Kernel;

template <typename ElementType, auto &Size,
sycl::access::decorated DecorateAddress>
void test() {
std::size_t N;

std::cin >> N;

std::vector<std::size_t> v(N);
{
sycl::queue q;
sycl::buffer<std::size_t> b(v);
q.submit([&](sycl::handler &cgh) {
sycl::accessor acc(b, cgh, sycl::write_only, sycl::no_init);
cgh.set_specialization_constant<Size>(N);
using spec_const_type = std::remove_reference_t<decltype(Size)>;
using size_type = typename spec_const_type::value_type;
cgh.single_task<Kernel<ElementType, size_type, DecorateAddress>>(
[=](sycl::kernel_handler h) {
auto ptr = sycl::ext::oneapi::experimental::private_alloca<
ElementType, Size, DecorateAddress>(h);
const std::size_t M = h.get_specialization_constant<Size>();
ptr[0] = static_cast<ElementType>(M);
ElementType value{1};
for (auto begin = ptr.get() + 1, end = ptr.get() + M; begin < end;
++begin, ++value) {
*begin = value;
}
auto accBegin = acc.begin();
for (auto begin = ptr.get(), end = ptr.get() + M; begin < end;
++begin, ++accBegin) {
*accBegin = *begin;
}
});
});
q.wait_and_throw();
}
assert(static_cast<std::size_t>(v.front()) == N &&
"Wrong private alloca length reported");
for (std::size_t i = 1; i < N; ++i) {
assert(static_cast<std::size_t>(v[i]) == i &&
"Wrong value in copied-back sequence");
}
}
12 changes: 12 additions & 0 deletions sycl/test-e2e/PrivateAlloca/private_alloca_bool_size.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// RUN: %{build} -w -o %t.out
// RUN: echo 1 | %{run} %t.out
// UNSUPPORTED: cuda || hip

// Test checking size of 'bool' type. This is not expected to be ever used, but,
// as 'bool' is an integral type, it is a possible scenario.

#include "Inputs/private_alloca_test.hpp"

constexpr sycl::specialization_id<bool> size(true);

int main() { test<int, size, sycl::access::decorated::legacy>(); }
15 changes: 15 additions & 0 deletions sycl/test-e2e/PrivateAlloca/private_alloca_decorated.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// RUN: %{build} -o %t.out
// RUN: echo 1 | %{run} %t.out
// RUN: echo 10 | %{run} %t.out
// RUN: echo 20 | %{run} %t.out
// RUN: echo 30 | %{run} %t.out
// UNSUPPORTED: cuda || hip

// Simple test filling a SYCL private alloca and copying it back to an output
// accessor using a decorated multi_ptr.

#include "Inputs/private_alloca_test.hpp"

constexpr sycl::specialization_id<int> size(10);

int main() { test<float, size, sycl::access::decorated::yes>(); }
Loading

0 comments on commit 4ff8fcf

Please sign in to comment.