Skip to content

Commit a105055

Browse files
authored
[SYCL][SCLA] Add CodeGen capabilities for sycl_ext_oneapi_private_alloca (#12894)
The [`sycl_ext_oneapi_private_alloca`](https://github.com/intel/llvm/blob/56e9067ba69809fb6ea1fd4328456ca3a009f984/sycl/doc/extensions/experimental/sycl_ext_oneapi_private_alloca.asciidoc) adds new functions returning a pointer to a specialization constant length SYCL private memory allocation. This commit adds codegen support for these functions. The `sycl::private_alloca` function is implemented as an alias to a new `__builtin_intel_sycl_alloca` builtin. This is needed to guarantee the call will lower to just an `alloca` instruction defining the private memory allocation. This builtin lowers to a SYCL builtin call to `__builtin_sycl_unique_stable_id` and a call to a new `llvm.sycl.alloca` intrinsic. This intrinsic receives three arguments encoding the specialization constant used as array length, a type hint argument encoding the allocation element type and the required alignment. Note the `sycl_ext_oneapi_private_alloca` extension defines two functions: `private_alloca` and `aligned_private_alloca`. This commit adds codegen support only for the first signature, but already prepares support for the aligned flavor by adding an argument encoding the memory allocation alignment to the `llvm.sycl.alloca` intrinsic. The intrinsic is needed as generating an `alloca` instruction right away would lead to optimization passes converting the size argument, which can be of any integral type, and thus difficulting lowering to a single SPIR-V `OpVariable` later in the pipeline. --------- Signed-off-by: Victor Perez <victor.perez@codeplay.com>
1 parent 03f61fc commit a105055

20 files changed

+737
-30
lines changed

clang/include/clang/Basic/Attr.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1450,12 +1450,12 @@ def SYCLType: InheritableAttr {
14501450
"specialization_id", "kernel_handler", "buffer_location",
14511451
"no_alias", "accessor_property_list", "group",
14521452
"private_memory", "aspect", "annotated_ptr", "annotated_arg",
1453-
"stream", "sampler", "host_pipe"],
1453+
"stream", "sampler", "host_pipe", "multi_ptr"],
14541454
["accessor", "local_accessor",
14551455
"specialization_id", "kernel_handler", "buffer_location",
14561456
"no_alias", "accessor_property_list", "group",
14571457
"private_memory", "aspect", "annotated_ptr", "annotated_arg",
1458-
"stream", "sampler", "host_pipe"]>];
1458+
"stream", "sampler", "host_pipe", "multi_ptr"]>];
14591459
// Only used internally by SYCL implementation
14601460
let Documentation = [InternalOnly];
14611461
}

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4635,6 +4635,12 @@ def IntelSYCLPtrAnnotation : Builtin {
46354635
let Prototype = "void(...)";
46364636
}
46374637

4638+
def IntelSYCLAlloca : Builtin {
4639+
let Spellings = ["__builtin_intel_sycl_alloca"];
4640+
let Attributes = [NoThrow, CustomTypeChecking];
4641+
let Prototype = "void *(void &)";
4642+
}
4643+
46384644
// Builtins for Intel FPGA
46394645
def IntelSYCLFPGAReg : Builtin {
46404646
let Spellings = ["__builtin_intel_fpga_reg"];

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,28 @@ def err_intel_sycl_ptr_annotation_mismatch
177177
"a pointer"
178178
"|a string literal or constexpr const char*}0">;
179179

180+
def err_intel_sycl_alloca_no_alias
181+
: Error<"__builtin_intel_sycl_alloca cannot be used in source code. "
182+
"Use the private_alloca alias instead">;
183+
def err_intel_sycl_alloca_wrong_arg_count
184+
: Error<"__builtin_intel_sycl_alloca expects to be passed a single "
185+
"argument. Got %0">;
186+
def err_intel_sycl_alloca_wrong_template_arg_count
187+
: Error<"__builtin_intel_sycl_alloca expects to be passed three template "
188+
"arguments. Got %0">;
189+
def err_intel_sycl_alloca_wrong_arg
190+
: Error<"__builtin_intel_sycl_alloca expects to be passed an argument of type "
191+
"'sycl::kernel_handler &'. Got %0">;
192+
def err_intel_sycl_alloca_wrong_type
193+
: Error<"__builtin_intel_sycl_alloca can only return 'sycl::private_ptr' "
194+
"to a cv-unqualified object type. Got %0">;
195+
def err_intel_sycl_alloca_wrong_size
196+
: Error<"__builtin_intel_sycl_alloca must be passed a specialization "
197+
"constant of integral value type as a template argument. Got %1 (%0)">;
198+
def err_intel_sycl_alloca_no_size
199+
: Error<"__builtin_intel_sycl_alloca must be passed a specialization "
200+
"constant of integral value type as a template argument. Got %0">;
201+
180202
// C99 variable-length arrays
181203
def ext_vla : Extension<"variable length arrays are a C99 feature">,
182204
InGroup<VLAExtension>;
@@ -4470,7 +4492,7 @@ def err_attribute_preferred_name_arg_invalid : Error<
44704492
"argument %0 to 'preferred_name' attribute is not a typedef for "
44714493
"a specialization of %1">;
44724494
def err_attribute_builtin_alias : Error<
4473-
"%0 attribute can only be applied to a ARM, HLSL or RISC-V builtin">;
4495+
"%0 attribute can only be applied to a ARM, HLSL, SYCL or RISC-V builtin">;
44744496

44754497
// called-once attribute diagnostics.
44764498
def err_called_once_attribute_wrong_type : Error<

clang/include/clang/Sema/Sema.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15193,6 +15193,9 @@ class Sema final {
1519315193

1519415194
bool CheckIntelSYCLPtrAnnotationBuiltinFunctionCall(unsigned BuiltinID,
1519515195
CallExpr *Call);
15196+
bool CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned BuiltinID,
15197+
CallExpr *Call);
15198+
1519615199
private:
1519715200
// We store SYCL Kernels here and handle separately -- which is a hack.
1519815201
// FIXME: It would be best to refactor this.
@@ -15336,6 +15339,9 @@ class Sema final {
1533615339
VDecl->hasGlobalStorage() &&
1533715340
(VDecl->getType().getAddressSpace() == LangAS::sycl_private);
1533815341
}
15342+
15343+
/// Check whether \p Ty corresponds to a SYCL type of name \p TypeName.
15344+
static bool isSyclType(QualType Ty, SYCLTypeAttr::SYCLType TypeName);
1533915345
};
1534015346

1534115347
DeductionFailureInfo

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5923,6 +5923,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
59235923
return EmitIntelFPGAMemBuiltin(E);
59245924
case Builtin::BI__builtin_intel_sycl_ptr_annotation:
59255925
return EmitIntelSYCLPtrAnnotationBuiltin(E);
5926+
case Builtin::BI__builtin_intel_sycl_alloca:
5927+
return EmitIntelSYCLAllocaBuiltin(E, ReturnValue);
59265928
case Builtin::BI__builtin_get_device_side_mangled_name: {
59275929
auto Name = CGM.getCUDARuntime().getDeviceSideName(
59285930
cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
@@ -23655,6 +23657,84 @@ RValue CodeGenFunction::EmitIntelSYCLPtrAnnotationBuiltin(const CallExpr *E) {
2365523657
return RValue::get(Ann);
2365623658
}
2365723659

23660+
RValue
23661+
CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E,
23662+
ReturnValueSlot ReturnValue) {
23663+
const FunctionDecl *FD = E->getDirectCallee();
23664+
assert(FD && "Expecting direct call to builtin");
23665+
23666+
SourceLocation Loc = E->getExprLoc();
23667+
23668+
// Get specialization constant ID.
23669+
ValueDecl *SpecConst =
23670+
FD->getTemplateSpecializationArgs()->get(1).getAsDecl();
23671+
DeclRefExpr *Ref = DeclRefExpr::Create(
23672+
getContext(), NestedNameSpecifierLoc(), SourceLocation(), SpecConst,
23673+
/*RefersToEnclosingVariableOrCapture=*/false, E->getExprLoc(),
23674+
SpecConst->getType(), ExprValueKind::VK_LValue);
23675+
llvm::Value *UID = EmitScalarExpr(
23676+
SYCLUniqueStableIdExpr::Create(getContext(), Loc, Loc, Loc, Ref));
23677+
23678+
// Get specialization ID pointer.
23679+
llvm::Value *SpecConstPtr =
23680+
EmitLValue(Ref, clang::CodeGen::KnownNonNull).getPointer(*this);
23681+
23682+
// Get specialization constant buffer.
23683+
// TODO: When this extension supports more targets, get RTBufferPtr from input
23684+
// sycl::kernel_handler &.
23685+
llvm::Value *RTBufferPtr = llvm::ConstantPointerNull::get(
23686+
cast<llvm::PointerType>(SpecConstPtr->getType()));
23687+
23688+
// Get allocation type.
23689+
const TemplateArgumentList &TAL =
23690+
cast<ClassTemplateSpecializationDecl>(E->getType()->getAsCXXRecordDecl())
23691+
->getTemplateArgs();
23692+
QualType AllocaType = TAL.get(0).getAsType();
23693+
llvm::Type *Ty = CGM.getTypes().ConvertTypeForMem(AllocaType);
23694+
unsigned AllocaAS = CGM.getDataLayout().getAllocaAddrSpace();
23695+
llvm::Type *AllocaTy = llvm::PointerType::get(Builder.getContext(), AllocaAS);
23696+
23697+
llvm::Constant *EltTyConst = llvm::Constant::getNullValue(Ty);
23698+
23699+
llvm::Constant *Align = Builder.getInt64(
23700+
getContext().getTypeAlignInChars(AllocaType).getAsAlign().value());
23701+
23702+
llvm::Value *Allocation = [&]() {
23703+
// To implement automatic storage duration of the underlying memory object,
23704+
// insert intrinsic call before `AllocaInsertPt`. These will be lowered to
23705+
// an `alloca` or an equivalent construct in later compilation stages.
23706+
IRBuilderBase::InsertPointGuard IPG(Builder);
23707+
Builder.SetInsertPoint(AllocaInsertPt);
23708+
return Builder.CreateIntrinsic(
23709+
AllocaTy, Intrinsic::sycl_alloca,
23710+
{UID, SpecConstPtr, RTBufferPtr, EltTyConst, Align}, nullptr, "alloca");
23711+
}();
23712+
23713+
// Perform AS cast if needed.
23714+
23715+
constexpr int NoDecorated = 0;
23716+
llvm::APInt Decorated = TAL.get(2).getAsIntegral();
23717+
// Both 'sycl::access::decorated::{yes and legacy}' lead to decorated (private
23718+
// AS) pointer type. Perform cast if 'sycl::access::decorated::no'.
23719+
if (Decorated == NoDecorated) {
23720+
IRBuilderBase::InsertPointGuard IPG(Builder);
23721+
Builder.SetInsertPoint(getPostAllocaInsertPoint());
23722+
unsigned DestAddrSpace =
23723+
getContext().getTargetAddressSpace(LangAS::Default);
23724+
llvm::PointerType *DestTy =
23725+
llvm::PointerType::get(Builder.getContext(), DestAddrSpace);
23726+
Allocation = Builder.CreateAddrSpaceCast(Allocation, DestTy);
23727+
}
23728+
23729+
// If no slot is provided, simply return allocation.
23730+
if (ReturnValue.isNull())
23731+
return RValue::get(Allocation);
23732+
23733+
// If a slot is provided, store pointer there.
23734+
Builder.CreateStore(Allocation, ReturnValue.getValue());
23735+
return RValue::getAggregate(ReturnValue.getValue());
23736+
}
23737+
2365823738
Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
2365923739
const CallExpr *E,
2366023740
ReturnValueSlot ReturnValue) {

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4446,6 +4446,8 @@ class CodeGenFunction : public CodeGenTypeCache {
44464446
RValue EmitIntelFPGAMemBuiltin(const CallExpr *E);
44474447

44484448
RValue EmitIntelSYCLPtrAnnotationBuiltin(const CallExpr *E);
4449+
RValue EmitIntelSYCLAllocaBuiltin(const CallExpr *E,
4450+
ReturnValueSlot ReturnValue);
44494451

44504452
llvm::CallInst *
44514453
MaybeEmitFPBuiltinofFD(llvm::FunctionType *IRFuncTy,

clang/lib/Sema/SemaChecking.cpp

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2773,6 +2773,16 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
27732773
if (CheckIntelSYCLPtrAnnotationBuiltinFunctionCall(BuiltinID, TheCall))
27742774
return ExprError();
27752775
break;
2776+
case Builtin::BI__builtin_intel_sycl_alloca:
2777+
if (!Context.getLangOpts().SYCLIsDevice) {
2778+
Diag(TheCall->getBeginLoc(), diag::err_builtin_requires_language)
2779+
<< "__builtin_intel_sycl_alloca"
2780+
<< "SYCL device";
2781+
return ExprError();
2782+
}
2783+
if (CheckIntelSYCLAllocaBuiltinFunctionCall(BuiltinID, TheCall))
2784+
return ExprError();
2785+
break;
27762786
case Builtin::BI__builtin_intel_fpga_mem:
27772787
if (!Context.getLangOpts().SYCLIsDevice) {
27782788
Diag(TheCall->getBeginLoc(), diag::err_builtin_requires_language)
@@ -7487,6 +7497,97 @@ bool Sema::CheckIntelSYCLPtrAnnotationBuiltinFunctionCall(unsigned BuiltinID,
74877497
return false;
74887498
}
74897499

7500+
bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned, CallExpr *Call) {
7501+
assert(getLangOpts().SYCLIsDevice &&
7502+
"Builtin can only be used in SYCL device code");
7503+
7504+
SourceLocation Loc = Call->getBeginLoc();
7505+
7506+
// This builtin cannot be called directly. As it needs to pass template
7507+
// arguments, this is always an alias.
7508+
const FunctionDecl *FD = Call->getDirectCallee();
7509+
assert(FD && "Builtin cannot be called from a function pointer");
7510+
if (!FD->hasAttr<BuiltinAliasAttr>()) {
7511+
Diag(Loc, diag::err_intel_sycl_alloca_no_alias);
7512+
return true;
7513+
}
7514+
7515+
// Check a single argument is passed
7516+
if (checkArgCount(*this, Call, 1))
7517+
return true;
7518+
7519+
// Check three template arguments are passed
7520+
if (const TemplateArgumentList *TAL = FD->getTemplateSpecializationArgs();
7521+
!TAL || TAL->size() != 3) {
7522+
Diag(Loc, diag::err_intel_sycl_alloca_wrong_template_arg_count)
7523+
<< (TAL ? TAL->size() : 0);
7524+
return true;
7525+
}
7526+
7527+
// Check the single argument is of type `sycl::kernel_handler &`
7528+
constexpr auto CheckArg = [](QualType Ty) {
7529+
if (!Ty->isLValueReferenceType())
7530+
return true;
7531+
Ty = Ty->getPointeeType();
7532+
return !(Ty.getQualifiers().empty() &&
7533+
isSyclType(Ty, SYCLTypeAttr::kernel_handler));
7534+
};
7535+
if (CheckArg(FD->getParamDecl(0)->getType())) {
7536+
Diag(Loc, diag::err_intel_sycl_alloca_wrong_arg)
7537+
<< FD->getParamDecl(0)->getType();
7538+
return true;
7539+
}
7540+
7541+
// Check the return type is `sycl::multi_ptr<ET,
7542+
// sycl::access::address_space::private_space, DecoratedAddress>`:
7543+
// - `ET`: non-const, non-volatile, non-void, non-function, non-reference type
7544+
constexpr auto CheckType = [](QualType RT) {
7545+
if (!isSyclType(RT, SYCLTypeAttr::multi_ptr))
7546+
return true;
7547+
// Check element type
7548+
const TemplateArgumentList &TAL =
7549+
cast<ClassTemplateSpecializationDecl>(RT->getAsRecordDecl())
7550+
->getTemplateArgs();
7551+
QualType ET = TAL.get(0).getAsType();
7552+
if (ET.isConstQualified() || ET.isVolatileQualified() || ET->isVoidType() ||
7553+
ET->isFunctionType() || ET->isReferenceType())
7554+
return true;
7555+
constexpr uint64_t PrivateAS = 0;
7556+
return TAL.get(1).getAsIntegral() != PrivateAS;
7557+
};
7558+
if (CheckType(FD->getReturnType())) {
7559+
Diag(Loc, diag::err_intel_sycl_alloca_wrong_type) << FD->getReturnType();
7560+
return true;
7561+
}
7562+
7563+
// Check size is passed as a specialization constant
7564+
constexpr auto CheckSize = [](const ASTContext &Ctx,
7565+
const TemplateArgumentList *CST) {
7566+
QualType Ty = CST->get(1).getNonTypeTemplateArgumentType();
7567+
if (Ty.isNull() || !Ty->isReferenceType())
7568+
return true;
7569+
Ty = Ty->getPointeeType();
7570+
if (!isSyclType(Ty, SYCLTypeAttr::specialization_id))
7571+
return true;
7572+
const TemplateArgumentList &TAL =
7573+
cast<ClassTemplateSpecializationDecl>(Ty->getAsCXXRecordDecl())
7574+
->getTemplateArgs();
7575+
return !TAL.get(0).getAsType()->isIntegralType(Ctx);
7576+
};
7577+
const TemplateArgumentList *CST = FD->getTemplateSpecializationArgs();
7578+
if (CheckSize(getASTContext(), CST)) {
7579+
TemplateArgument TA = CST->get(1);
7580+
QualType Ty = TA.getNonTypeTemplateArgumentType();
7581+
if (Ty.isNull())
7582+
Diag(Loc, diag::err_intel_sycl_alloca_no_size) << TA;
7583+
else
7584+
Diag(Loc, diag::err_intel_sycl_alloca_wrong_size) << TA << Ty;
7585+
return true;
7586+
}
7587+
7588+
return false;
7589+
}
7590+
74907591
/// Given a FunctionDecl's FormatAttr, attempts to populate the FomatStringInfo
74917592
/// parameter with the FormatAttr's correct format_idx and firstDataArg.
74927593
/// Returns true when the format fits the function and the FormatStringInfo has

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8859,6 +8859,11 @@ static bool RISCVAliasValid(unsigned BuiltinID, StringRef AliasName) {
88598859
BuiltinID <= RISCV::LastRVVBuiltin;
88608860
}
88618861

8862+
static bool SYCLAliasValid(ASTContext &Context, unsigned BuiltinID) {
8863+
constexpr llvm::StringLiteral Prefix = "__builtin_intel_sycl";
8864+
return Context.BuiltinInfo.getName(BuiltinID).starts_with(Prefix);
8865+
}
8866+
88628867
static void handleBuiltinAliasAttr(Sema &S, Decl *D,
88638868
const ParsedAttr &AL) {
88648869
if (!AL.isArgIdent(0)) {
@@ -8875,11 +8880,13 @@ static void handleBuiltinAliasAttr(Sema &S, Decl *D,
88758880
bool IsARM = S.Context.getTargetInfo().getTriple().isARM();
88768881
bool IsRISCV = S.Context.getTargetInfo().getTriple().isRISCV();
88778882
bool IsHLSL = S.Context.getLangOpts().HLSL;
8883+
bool IsSYCL = S.Context.getLangOpts().isSYCL();
88788884
if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) ||
88798885
(IsARM && !ArmMveAliasValid(BuiltinID, AliasName) &&
88808886
!ArmCdeAliasValid(BuiltinID, AliasName)) ||
88818887
(IsRISCV && !RISCVAliasValid(BuiltinID, AliasName)) ||
8882-
(!IsAArch64 && !IsARM && !IsRISCV && !IsHLSL)) {
8888+
(IsSYCL && !SYCLAliasValid(S.Context, BuiltinID)) ||
8889+
(!IsAArch64 && !IsARM && !IsRISCV && !IsHLSL && !IsSYCL)) {
88838890
S.Diag(AL.getLoc(), diag::err_attribute_builtin_alias) << AL;
88848891
return;
88858892
}

0 commit comments

Comments
 (0)