diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index cbc03a363233a..73b0e74a221c7 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2646,50 +2646,14 @@ def SYCLIntelEnableLoopPipelining : StmtAttr { def : MutualExclusions<[SYCLIntelDisableLoopPipelining, SYCLIntelEnableLoopPipelining]>; -def SYCLIntelLocalNonConstVar : SubsetSubjecthasLocalStorage() && - S->getKind() != Decl::ImplicitParam && - S->getKind() != Decl::ParmVar && - S->getKind() != Decl::NonTypeTemplateParm && - !S->getType().isConstQualified()}], - "local non-const variables">; - -def SYCLIntelConstVar : SubsetSubjectgetKind() != Decl::ImplicitParam && - S->getKind() != Decl::ParmVar && - S->getKind() != Decl::NonTypeTemplateParm && - (S->getType().isConstQualified() || - S->getType().getAddressSpace() == - LangAS::opencl_constant)}], - "constant variables">; - -def SYCLIntelLocalStaticAgentMemVar : SubsetSubjectgetKind() != Decl::ImplicitParam && - S->getKind() != Decl::NonTypeTemplateParm && - (S->getStorageClass() == SC_Static || - S->hasLocalStorage())}], - "local variables, static variables, agent memory arguments">; - -def SYCLIntelLocalOrStaticVar : SubsetSubjectgetKind() != Decl::ImplicitParam && - S->getKind() != Decl::ParmVar && - S->getKind() != Decl::NonTypeTemplateParm && - (S->getStorageClass() == SC_Static || - S->hasLocalStorage())}], - "local variables, static variables">; - def SYCLIntelDoublePump : Attr { let Spellings = [CXX11<"intel", "doublepump">]; - let Subjects = SubjectList<[SYCLIntelConstVar, SYCLIntelLocalOrStaticVar, - Field], ErrorDiag>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Documentation = [SYCLIntelDoublePumpAttrDocs]; } def SYCLIntelSinglePump : Attr { let Spellings = [CXX11<"intel", "singlepump">]; - let Subjects = SubjectList<[SYCLIntelConstVar, SYCLIntelLocalOrStaticVar, - Field], ErrorDiag>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Documentation = [SYCLIntelSinglePumpAttrDocs]; } @@ -2708,17 +2672,12 @@ def SYCLIntelMemory : Attr { } } }]; - let Subjects = SubjectList<[SYCLIntelConstVar, - SYCLIntelLocalStaticAgentMemVar, - Field], ErrorDiag>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Documentation = [SYCLIntelMemoryAttrDocs]; } def SYCLIntelRegister : Attr { let Spellings = [CXX11<"intel", "fpga_register">]; - let Subjects = SubjectList<[SYCLIntelConstVar, SYCLIntelLocalOrStaticVar, - Field], ErrorDiag>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Documentation = [SYCLIntelRegisterAttrDocs]; } @@ -2729,9 +2688,6 @@ def : MutualExclusions<[SYCLIntelDoublePump, SYCLIntelSinglePump, def SYCLIntelBankWidth : InheritableAttr { let Spellings = [CXX11<"intel", "bankwidth">]; let Args = [ExprArgument<"Value">]; - let Subjects = SubjectList<[SYCLIntelConstVar, - SYCLIntelLocalStaticAgentMemVar, - Field], ErrorDiag>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Documentation = [SYCLIntelBankWidthAttrDocs]; } @@ -2740,9 +2696,6 @@ def : MutualExclusions<[SYCLIntelRegister, SYCLIntelBankWidth]>; def SYCLIntelNumBanks : InheritableAttr { let Spellings = [CXX11<"intel", "numbanks">]; let Args = [ExprArgument<"Value">]; - let Subjects = SubjectList<[SYCLIntelConstVar, - SYCLIntelLocalStaticAgentMemVar, - Field], ErrorDiag>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Documentation = [SYCLIntelNumBanksAttrDocs]; } @@ -2751,7 +2704,6 @@ def SYCLIntelPrivateCopies : InheritableAttr { let Spellings = [CXX11<"intel", "private_copies">]; let Args = [ExprArgument<"Value">]; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; - let Subjects = SubjectList<[SYCLIntelLocalNonConstVar, Field], ErrorDiag>; let Documentation = [SYCLIntelPrivateCopiesAttrDocs]; } def : MutualExclusions<[SYCLIntelRegister, SYCLIntelPrivateCopies]>; @@ -2760,8 +2712,6 @@ def : MutualExclusions<[SYCLIntelRegister, SYCLIntelPrivateCopies]>; def SYCLIntelMerge : Attr { let Spellings = [CXX11<"intel", "merge">]; let Args = [StringArgument<"Name">, StringArgument<"Direction">]; - let Subjects = SubjectList<[SYCLIntelConstVar, SYCLIntelLocalOrStaticVar, - Field], ErrorDiag>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Documentation = [SYCLIntelMergeAttrDocs]; } @@ -2770,9 +2720,6 @@ def : MutualExclusions<[SYCLIntelRegister, SYCLIntelMerge]>; def SYCLIntelMaxReplicates : InheritableAttr { let Spellings = [CXX11<"intel", "max_replicates">]; let Args = [ExprArgument<"Value">]; - let Subjects = SubjectList<[SYCLIntelConstVar, - SYCLIntelLocalStaticAgentMemVar, - Field], ErrorDiag>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Documentation = [SYCLIntelMaxReplicatesAttrDocs]; } @@ -2780,9 +2727,6 @@ def : MutualExclusions<[SYCLIntelRegister, SYCLIntelMaxReplicates]>; def SYCLIntelSimpleDualPort : Attr { let Spellings = [CXX11<"intel", "simple_dual_port">]; - let Subjects = SubjectList<[SYCLIntelConstVar, - SYCLIntelLocalStaticAgentMemVar, - Field], ErrorDiag>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Documentation = [SYCLIntelSimpleDualPortAttrDocs]; } @@ -2807,9 +2751,6 @@ def SYCLIntelPipeIO : InheritableAttr { def SYCLIntelBankBits : Attr { let Spellings = [CXX11<"intel", "bank_bits">]; let Args = [VariadicExprArgument<"Args">]; - let Subjects = SubjectList<[SYCLIntelConstVar, - SYCLIntelLocalStaticAgentMemVar, - Field], ErrorDiag>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Documentation = [SYCLIntelBankBitsDocs]; } @@ -2819,9 +2760,6 @@ def : MutualExclusions<[SYCLIntelRegister, SYCLIntelNumBanks]>; def SYCLIntelForcePow2Depth : InheritableAttr { let Spellings = [CXX11<"intel", "force_pow2_depth">]; let Args = [ExprArgument<"Value">]; - let Subjects = SubjectList<[SYCLIntelConstVar, - SYCLIntelLocalStaticAgentMemVar, - Field], ErrorDiag>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Documentation = [SYCLIntelForcePow2DepthAttrDocs]; } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index af29649364bd2..54feb5d797ed9 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12218,6 +12218,13 @@ def err_sycl_attribute_internal_decl "in an anonymous namespace">; def err_sycl_attribute_not_device_global : Error<"%0 attribute can only be applied to 'device_global' variables">; +def err_fpga_attribute_incorrect_variable + : Error<"%0 attribute only applies to constant variables, local variables, " + "static variables, %select{|agent memory arguments, }1non-static data " + "members and device_global variables">; +def err_fpga_attribute_invalid_decl + : Error<"%0 attribute only applies to const variables, local variables, " + "non-static data members and device_global variables">; def err_sycl_compiletime_property_duplication : Error< "can't apply %0 property twice to the same accessor">; def err_sycl_invalid_property_list_param_number : Error< diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index a9e60fe347126..03a88b9e89cf1 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -371,6 +371,8 @@ static llvm::SmallVector SYCLDeviceLibList { "bfloat16", "crt", "cmath", "cmath-fp64", "complex", "complex-fp64", #if defined(_WIN32) "msvc-math", +#else + "sanitizer", #endif "imf", "imf-fp64", "itt-compiler-wrappers", "itt-stubs", "itt-user-wrappers", "fallback-cassert", "fallback-cstring", diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 68949acfcd5fb..b11f223f1ce5d 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7443,6 +7443,24 @@ static bool checkForDuplicateAttribute(Sema &S, Decl *D, return false; } +// Checks if FPGA memory attributes apply on valid variables. +// Returns true if an error occured. +static bool CheckValidFPGAMemoryAttributesVar(Sema &S, Decl *D) { + if (const auto *VD = dyn_cast(D)) { + if (!(isa(D) || + (VD->getKind() != Decl::ImplicitParam && + VD->getKind() != Decl::NonTypeTemplateParm && + (S.isTypeDecoratedWithDeclAttribute( + VD->getType()) || + VD->getType().isConstQualified() || + VD->getType().getAddressSpace() == LangAS::opencl_constant || + VD->getStorageClass() == SC_Static || VD->hasLocalStorage())))) { + return true; + } + } + return false; +} + void Sema::AddSYCLIntelNoGlobalWorkOffsetAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E) { @@ -7521,6 +7539,15 @@ static void handleSYCLIntelSinglePumpAttr(Sema &S, Decl *D, } } + // Check attribute applies to field, constant variables, local variables, + // static variables, non-static data members, and device_global variables. + if ((D->getKind() == Decl::ParmVar) || + CheckValidFPGAMemoryAttributesVar(S, D)) { + S.Diag(AL.getLoc(), diag::err_fpga_attribute_incorrect_variable) + << AL << /*agent memory arguments*/ 0; + return; + } + // If the declaration does not have an [[intel::fpga_memory]] // attribute, this creates one as an implicit attribute. if (!D->hasAttr()) @@ -7544,6 +7571,15 @@ static void handleSYCLIntelDoublePumpAttr(Sema &S, Decl *D, } } + // Check attribute applies to field, constant variables, local variables, + // static variables, non-static data members, and device_global variables. + if ((D->getKind() == Decl::ParmVar) || + CheckValidFPGAMemoryAttributesVar(S, D)) { + S.Diag(AL.getLoc(), diag::err_fpga_attribute_incorrect_variable) + << AL << /*agent memory arguments*/ 0; + return; + } + // If the declaration does not have an [[intel::fpga_memory]] // attribute, this creates one as an implicit attribute. if (!D->hasAttr()) @@ -7591,6 +7627,15 @@ static void handleSYCLIntelMemoryAttr(Sema &S, Decl *D, const ParsedAttr &AL) { D->dropAttr(); } + // Check attribute applies to field, constant variables, local variables, + // static variables, agent memory arguments, non-static data members, + // and device_global variables. + if (CheckValidFPGAMemoryAttributesVar(S, D)) { + S.Diag(AL.getLoc(), diag::err_fpga_attribute_incorrect_variable) + << AL << /*agent memory arguments*/ 1; + return; + } + D->addAttr(::new (S.Context) SYCLIntelMemoryAttr(S.Context, AL, Kind)); } @@ -7623,6 +7668,15 @@ static void handleSYCLIntelRegisterAttr(Sema &S, Decl *D, } } + // Check attribute applies to field, constant variables, local variables, + // static variables, non-static data members, and device_global variables. + if ((D->getKind() == Decl::ParmVar) || + CheckValidFPGAMemoryAttributesVar(S, D)) { + S.Diag(A.getLoc(), diag::err_fpga_attribute_incorrect_variable) + << A << /*agent memory arguments*/ 0; + return; + } + if (checkIntelFPGARegisterAttrCompatibility(S, D, A)) return; @@ -7661,6 +7715,15 @@ void Sema::AddSYCLIntelBankWidthAttr(Decl *D, const AttributeCommonInfo &CI, return; } + // Check attribute applies to field, constant variables, local variables, + // static variables, agent memory arguments, non-static data members, + // and device_global variables. + if (CheckValidFPGAMemoryAttributesVar(*this, D)) { + Diag(CI.getLoc(), diag::err_fpga_attribute_incorrect_variable) + << CI << /*agent memory arguments*/ 1; + return; + } + // Check to see if there's a duplicate attribute with different values // already applied to the declaration. if (const auto *DeclAttr = D->getAttr()) { @@ -7745,6 +7808,15 @@ void Sema::AddSYCLIntelNumBanksAttr(Decl *D, const AttributeCommonInfo &CI, } } + // Check attribute applies to constant variables, local variables, + // static variables, agent memory arguments, non-static data members, + // and device_global variables. + if (CheckValidFPGAMemoryAttributesVar(*this, D)) { + Diag(CI.getLoc(), diag::err_fpga_attribute_incorrect_variable) + << CI << /*agent memory arguments*/ 1; + return; + } + // Check to see if there's a duplicate attribute with different values // already applied to the declaration. if (const auto *DeclAttr = D->getAttr()) { @@ -7812,6 +7884,15 @@ static void handleIntelSimpleDualPortAttr(Sema &S, Decl *D, } } + // Check attribute applies to field, constant variables, local variables, + // static variables, agent memory arguments, non-static data members, + // and device_global variables. + if (CheckValidFPGAMemoryAttributesVar(S, D)) { + S.Diag(AL.getLoc(), diag::err_fpga_attribute_incorrect_variable) + << AL << /*agent memory arguments*/ 1; + return; + } + if (!D->hasAttr()) D->addAttr(SYCLIntelMemoryAttr::CreateImplicit( S.Context, SYCLIntelMemoryAttr::Default)); @@ -7837,6 +7918,16 @@ void Sema::AddSYCLIntelMaxReplicatesAttr(Decl *D, const AttributeCommonInfo &CI, << CI << /*positive*/ 0; return; } + + // Check attribute applies to field, constant variables, local variables, + // static variables, agent memory arguments, non-static data members, + // and device_global variables. + if (CheckValidFPGAMemoryAttributesVar(*this, D)) { + Diag(CI.getLoc(), diag::err_fpga_attribute_incorrect_variable) + << CI << /*agent memory arguments*/ 1; + return; + } + // Check to see if there's a duplicate attribute with different values // already applied to the declaration. if (const auto *DeclAttr = D->getAttr()) { @@ -7920,6 +8011,15 @@ static void handleSYCLIntelMergeAttr(Sema &S, Decl *D, const ParsedAttr &AL) { return; } + // Check attribute applies to field, constant variables, local variables, + // static variables, non-static data members, and device_global variables. + if ((D->getKind() == Decl::ParmVar) || + CheckValidFPGAMemoryAttributesVar(S, D)) { + S.Diag(AL.getLoc(), diag::err_fpga_attribute_incorrect_variable) + << AL << /*agent memory arguments*/ 0; + return; + } + if (!D->hasAttr()) D->addAttr(SYCLIntelMemoryAttr::CreateImplicit( S.Context, SYCLIntelMemoryAttr::Default)); @@ -8005,6 +8105,15 @@ void Sema::AddSYCLIntelBankBitsAttr(Decl *D, const AttributeCommonInfo &CI, D->addAttr(SYCLIntelNumBanksAttr::CreateImplicit(Context, NBE)); } + // Check attribute applies to field, constant variables, local variables, + // static variables, agent memory arguments, non-static data members, + // and device_global variables. + if (CheckValidFPGAMemoryAttributesVar(*this, D)) { + Diag(CI.getLoc(), diag::err_fpga_attribute_incorrect_variable) + << CI << /*agent memory arguments*/ 1; + return; + } + if (!D->hasAttr()) D->addAttr(SYCLIntelMemoryAttr::CreateImplicit( Context, SYCLIntelMemoryAttr::Default)); @@ -8030,6 +8139,22 @@ void Sema::AddSYCLIntelPrivateCopiesAttr(Decl *D, const AttributeCommonInfo &CI, << CI << /*non-negative*/ 1; return; } + + // Check attribute applies to field as well as const variables, non-static + // local variables, non-static data members, and device_global variables. + if (const auto *VD = dyn_cast(D)) { + if (!(isa(D) || + (VD->getKind() != Decl::ImplicitParam && + VD->getKind() != Decl::NonTypeTemplateParm && + VD->getKind() != Decl::ParmVar && + (VD->hasLocalStorage() || + isTypeDecoratedWithDeclAttribute( + VD->getType()))))) { + Diag(CI.getLoc(), diag::err_fpga_attribute_invalid_decl) << CI; + return; + } + } + // Check to see if there's a duplicate attribute with different values // already applied to the declaration. if (const auto *DeclAttr = D->getAttr()) { @@ -8080,6 +8205,15 @@ void Sema::AddSYCLIntelForcePow2DepthAttr(Decl *D, return; } + // Check attribute applies to field, constant variables, local variables, + // static variables, agent memory arguments, non-static data members, + // and device_global variables. + if (CheckValidFPGAMemoryAttributesVar(*this, D)) { + Diag(CI.getLoc(), diag::err_fpga_attribute_incorrect_variable) + << CI << /*agent memory arguments*/ 1; + return; + } + // Check to see if there's a duplicate attribute with different values // already applied to the declaration. if (const auto *DeclAttr = D->getAttr()) { diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index d0f8ddbe7193c..929adf5398d84 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -726,6 +726,25 @@ class DeviceFunctionTracker { } }; +/// This function checks whether given DeclContext contains a topmost +/// namespace with name "sycl". +static bool isDeclaredInSYCLNamespace(const Decl *D) { + const DeclContext *DC = D->getDeclContext()->getEnclosingNamespaceContext(); + const auto *ND = dyn_cast(DC); + // If this is not a namespace, then we are done. + if (!ND) + return false; + + // While it is a namespace, find its parent scope. + while (const DeclContext *Parent = ND->getParent()) { + if (!isa(Parent)) + break; + ND = cast(Parent); + } + + return ND && ND->getName() == "sycl"; +} + // This type does the heavy lifting for the management of device functions, // recursive function detection, and attribute collection for a single // kernel/external function. It walks the callgraph to find all functions that @@ -770,6 +789,20 @@ class SingleDeviceFunctionTracker { Parent.SemaRef.addFDToReachableFromSyclDevice(CurrentDecl, CallStack.back()); + // If this is a parallel_for_work_item that is declared in the + // sycl namespace, mark it with the WorkItem scope attribute. + // Note: Here, we assume that this is called from within a + // parallel_for_work_group; it is undefined to call it otherwise. + // We deliberately do not diagnose a violation. + if (CurrentDecl->getIdentifier() && + CurrentDecl->getIdentifier()->getName() == "parallel_for_work_item" && + isDeclaredInSYCLNamespace(CurrentDecl) && + !CurrentDecl->hasAttr()) { + CurrentDecl->addAttr( + SYCLScopeAttr::CreateImplicit(Parent.SemaRef.getASTContext(), + SYCLScopeAttr::Level::WorkItem)); + } + // We previously thought we could skip this function if we'd seen it before, // but if we haven't seen it before in this call graph, we can end up // missing a recursive call. SO, we have to revisit call-graphs we've @@ -919,14 +952,13 @@ class MarkWIScopeFnVisitor : public RecursiveASTVisitor { // not a member of sycl::group - continue search return true; auto Name = Callee->getName(); - if (((Name != "parallel_for_work_item") && (Name != "wait_for")) || + if (Name != "wait_for" || Callee->hasAttr()) return true; - // it is a call to sycl::group::parallel_for_work_item/wait_for - - // mark the callee + // it is a call to sycl::group::wait_for - mark the callee Callee->addAttr( SYCLScopeAttr::CreateImplicit(Ctx, SYCLScopeAttr::Level::WorkItem)); - // continue search as there can be other PFWI or wait_for calls + // continue search as there can be other wait_for calls return true; } @@ -2968,7 +3000,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { assert(CallOperator && "non callable object is passed as kernel obj"); // Mark the function that it "works" in a work group scope: - // NOTE: In case of parallel_for_work_item the marker call itself is + // NOTE: In case of wait_for the marker call itself is // marked with work item scope attribute, here the '()' operator of the // object passed as parameter is marked. This is an optimization - // there are a lot of locals created at parallel_for_work_group @@ -2979,7 +3011,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { if (!CallOperator->hasAttr()) { CallOperator->addAttr(SYCLScopeAttr::CreateImplicit( SemaRef.getASTContext(), SYCLScopeAttr::Level::WorkGroup)); - // Search and mark parallel_for_work_item calls: + // Search and mark wait_for calls: MarkWIScopeFnVisitor MarkWIScope(SemaRef.getASTContext()); MarkWIScope.TraverseDecl(CallOperator); // Now mark local variables declared in the PFWG lambda with work group diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index fc5abb02d75f3..4149cacad30c1 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -33,6 +33,10 @@ template class __SYCL_TYPE(group) group { public: group() = default; // fake constructor + // Dummy parallel_for_work_item function to mimic calls from + // parallel_for_work_group. + void parallel_for_work_item() { + } }; namespace access { diff --git a/clang/test/CodeGenSYCL/device_global.cpp b/clang/test/CodeGenSYCL/device_global.cpp index 88b3e8f607a96..6a828c63719cc 100644 --- a/clang/test/CodeGenSYCL/device_global.cpp +++ b/clang/test/CodeGenSYCL/device_global.cpp @@ -11,6 +11,20 @@ using namespace sycl; queue q; device_global A; + +[[intel::numbanks(2)]] device_global Nonconst_glob; +[[intel::max_replicates(2)]] device_global Nonconst_glob1; +[[intel::force_pow2_depth(1)]] device_global Nonconst_glob2; +[[intel::bankwidth(2)]] device_global Nonconst_glob3; +[[intel::simple_dual_port]] device_global Nonconst_glob4; +[[intel::fpga_memory]] device_global Nonconst_glob5; +[[intel::bank_bits(3, 4)]] device_global Nonconst_glob6; +[[intel::fpga_register]] device_global Nonconst_glob7; +[[intel::doublepump]] device_globalNonconst_glob8; +[[intel::singlepump]] device_global Nonconst_glob9; +[[intel::merge("mrg5", "width")]] device_global Nonconst_glob10; +[[intel::private_copies(8)]] device_global Nonconst_glob11; + #ifdef SYCL_EXTERNAL SYCL_EXTERNAL device_global AExt; #endif @@ -20,8 +34,21 @@ struct Foo { static device_global C; }; device_global Foo::C; + // CHECK-RDC: @AExt = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[AEXT_ATTRS:[0-9]+]] // CHECK: @A = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[A_ATTRS:[0-9]+]] +// CHECK: @Nonconst_glob = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[Non_Const_Num_ATTRS:[0-9]+]] +// CHECK: @Nonconst_glob1 = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[Non_Const_Max_ATTRS:[0-9]+]] +// CHECK: @Nonconst_glob2 = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[Non_Const_Force_ATTRS:[0-9]+]] +// CHECK: @Nonconst_glob3 = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[Non_Const_Bankw_ATTRS:[0-9]+]] +// CHECK: @Nonconst_glob4 = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[Non_Const_Simple_ATTRS:[0-9]+]] +// CHECK: @Nonconst_glob5 = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[Non_Const_Mem_ATTRS:[0-9]+]] +// CHECK: @Nonconst_glob6 = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[Non_Const_Bankbits_ATTRS:[0-9]+]] +// CHECK: @Nonconst_glob7 = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[Non_Const_Reg_ATTRS:[0-9]+]] +// CHECK: @Nonconst_glob8 = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[Non_Const_Dpump_ATTRS:[0-9]+]] +// CHECK: @Nonconst_glob9 = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[Non_Const_Spump_ATTRS:[0-9]+]] +// CHECK: @Nonconst_glob10 = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[Non_Const_Merge_ATTRS:[0-9]+]] +// CHECK: @Nonconst_glob11 = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[Non_Const_Pc_ATTRS:[0-9]+]] // CHECK: @_ZL1B = internal addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[B_ATTRS:[0-9]+]] // CHECK: @_ZN3Foo1CE = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[C_ATTRS:[0-9]+]] @@ -70,6 +97,18 @@ void foo() { q.submit([&](handler &h) { h.single_task([=]() { (void)A; + (void)Nonconst_glob; + (void)Nonconst_glob1; + (void)Nonconst_glob2; + (void)Nonconst_glob3; + (void)Nonconst_glob4; + (void)Nonconst_glob5; + (void)Nonconst_glob6; + (void)Nonconst_glob7; + (void)Nonconst_glob8; + (void)Nonconst_glob9; + (void)Nonconst_glob10; + (void)Nonconst_glob11; (void)B; (void)Foo::C; (void)same_name; @@ -103,6 +142,18 @@ void bar() { // CHECK-RDC: attributes #[[AEXT_ATTRS]] = { "sycl-unique-id"="_Z4AExt" } // CHECK: attributes #[[A_ATTRS]] = { "sycl-unique-id"="_Z1A" } +// CHECK: attributes #[[Non_Const_Num_ATTRS]] = { "sycl-unique-id"="_Z13Nonconst_glob" } +// CHECK: attributes #[[Non_Const_Max_ATTRS]] = { "sycl-unique-id"="_Z14Nonconst_glob1" } +// CHECK: attributes #[[Non_Const_Force_ATTRS]] = { "sycl-unique-id"="_Z14Nonconst_glob2" } +// CHECK: attributes #[[Non_Const_Bankw_ATTRS]] = { "sycl-unique-id"="_Z14Nonconst_glob3" } +// CHECK: attributes #[[Non_Const_Simple_ATTRS]] = { "sycl-unique-id"="_Z14Nonconst_glob4" } +// CHECK: attributes #[[Non_Const_Mem_ATTRS]] = { "sycl-unique-id"="_Z14Nonconst_glob5" } +// CHECK: attributes #[[Non_Const_Bankbits_ATTRS]] = { "sycl-unique-id"="_Z14Nonconst_glob6" } +// CHECK: attributes #[[Non_Const_Reg_ATTRS]] = { "sycl-unique-id"="_Z14Nonconst_glob7" } +// CHECK: attributes #[[Non_Const_Dpump_ATTRS]] = { "sycl-unique-id"="_Z14Nonconst_glob8" } +// CHECK: attributes #[[Non_Const_Spump_ATTRS]] = { "sycl-unique-id"="_Z14Nonconst_glob9" } +// CHECK: attributes #[[Non_Const_Merge_ATTRS]] = { "sycl-unique-id"="_Z15Nonconst_glob10" } +// CHECK: attributes #[[Non_Const_Pc_ATTRS]] = { "sycl-unique-id"="_Z15Nonconst_glob11" } // CHECK: attributes #[[B_ATTRS]] = { "sycl-unique-id"="THE_PREFIX____ZL1B" } // CHECK: attributes #[[C_ATTRS]] = { "sycl-unique-id"="_ZN3Foo1CE" } // CHECK: attributes #[[SAME_NAME_ATTRS]] = { "sycl-unique-id"="_Z9same_name" } diff --git a/clang/test/CodeGenSYCL/sycl-pf-work-item.cpp b/clang/test/CodeGenSYCL/sycl-pf-work-item.cpp new file mode 100644 index 0000000000000..7aa8d250ea822 --- /dev/null +++ b/clang/test/CodeGenSYCL/sycl-pf-work-item.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -internal-isystem %S/Inputs -emit-llvm %s -o - | FileCheck %s +// This test checks if the parallel_for_work_item called indirecly from +// parallel_for_work_group gets the work_item_scope marker on it. +#include + +void foo(sycl::group<1> work_group) { + work_group.parallel_for_work_item(); +} + +int main(int argc, char **argv) { + sycl::queue q; + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for_work_group( + sycl::range<1>{1}, sycl::range<1>{1024}, ([=](sycl::group<1> wGroup) { + foo(wGroup); + })); + }); + return 0; +} + +// CHECK: define {{.*}} void @{{.*}}sycl{{.*}}group{{.*}}parallel_for_work_item{{.*}}(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) {{.*}}!work_item_scope {{.*}}!parallel_for_work_item diff --git a/clang/test/Driver/sycl-device-lib.cpp b/clang/test/Driver/sycl-device-lib.cpp index 325505bad4556..c4b95e9477c0c 100644 --- a/clang/test/Driver/sycl-device-lib.cpp +++ b/clang/test/Driver/sycl-device-lib.cpp @@ -36,6 +36,7 @@ // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-bf16.o" "-output={{.*}}libsycl-fallback-imf-bf16-{{.*}}.o" "-unbundle" + /// ########################################################################### /// test sycl fallback device libraries are not linked by default // RUN: %clangxx -fsycl -fsycl-device-lib-jit-link %s --sysroot=%S/Inputs/SYCL -### 2>&1 \ @@ -251,3 +252,4 @@ // SYCL_DEVICE_ASAN_MACRO-SAME: "USE_SYCL_DEVICE_ASAN" // SYCL_DEVICE_ASAN_MACRO-NEXT: llvm-link{{.*}} // SYCL_DEVICE_ASAN_MACRO: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-sanitizer.o" "-output={{.*}}libsycl-sanitizer-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_ASAN_MACRO: llvm-link{{.*}} "-only-needed" "{{.*}}" "-o" "{{.*}}.bc" "--suppress-warnings" diff --git a/clang/test/SemaSYCL/intel-fpga-device-global.cpp b/clang/test/SemaSYCL/intel-fpga-device-global.cpp new file mode 100644 index 0000000000000..86b31a2efa429 --- /dev/null +++ b/clang/test/SemaSYCL/intel-fpga-device-global.cpp @@ -0,0 +1,434 @@ +// RUN: %clang_cc1 -fsycl-is-device -std=c++17 -sycl-std=2020 -verify %s +#include "Inputs/sycl.hpp" + +// Tests that [[intel::numbanks()]], [[intel::fpga_register]], [[intel::private_copies()]], [[intel::doublepump]], [[intel::singlepump]], [[intel::merge()]], [[intel::fpga_memory()]], [[intel::bank_bits()]], [[intel::force_pow2_depth()]], [[intel::max_replicates()]], [[intel::bankwidth()]], [[intel::simple_dual_port]] can be applied to device_global variables as well as constant variables, local variables, static variables, agent memory arguments, non-static data members. + +using namespace sycl::ext::oneapi; + +[[intel::numbanks(4)]] static device_global static_dev_glob; +[[intel::max_replicates(12)]] static device_global static_dev_glob1; +[[intel::force_pow2_depth(1)]] static device_global static_dev_glob2; +[[intel::bankwidth(4)]] static device_global static_dev_glob3; +[[intel::simple_dual_port]] static device_global static_dev_glob4; +[[intel::fpga_memory]] static device_global static_dev_glob5; +[[intel::bank_bits(3, 4)]] static device_global static_dev_glob6; +[[intel::fpga_register]] static device_global static_dev_glob7; +[[intel::doublepump]] static device_global static_dev_glob8; +[[intel::singlepump]] static device_global static_dev_glob9; +[[intel::merge("mrg5", "width")]] static device_global static_dev_glob10; + +// expected-error@+1{{'numbanks' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::numbanks(2)]] int K; + +// expected-error@+1{{'max_replicates' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::max_replicates(10)]] int K1; + +// expected-error@+1{{'force_pow2_depth' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::force_pow2_depth(1)]] int K2; + +// expected-error@+1{{'bankwidth' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::bankwidth(8)]] int K3; + +// expected-error@+1{{'simple_dual_port' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::simple_dual_port]] int K4; + +// expected-error@+1{{'fpga_memory' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::fpga_memory]] int K5; + +// expected-error@+1{{'bank_bits' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::bank_bits(3, 4)]] int K6; + +// expected-error@+1{{'fpga_register' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::fpga_register]] int K7; + +// expected-error@+1{{'doublepump' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::doublepump]] int K8; + +// expected-error@+1{{'singlepump' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::singlepump]] int K9; + +// expected-error@+1{{'merge' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::merge("mrg3", "width")]] int K10; + +//expected-error@+1{{'private_copies' attribute only applies to const variables, local variables, non-static data members and device_global variables}} +[[intel::private_copies(16)]] int K12; + +struct bar { + [[intel::numbanks(2)]] device_global nonconst_glob; + [[intel::numbanks(4)]] const device_global const_glob; + [[intel::numbanks(8)]] unsigned int numbanks[64]; + + [[intel::max_replicates(2)]] device_global nonconst_glob1; + [[intel::max_replicates(4)]] const device_global const_glob1; + [[intel::max_replicates(8)]] unsigned int max_rep[64]; + + [[intel::force_pow2_depth(0)]] device_global nonconst_glob2; + [[intel::force_pow2_depth(0)]] const device_global const_glob2; + [[intel::force_pow2_depth(1)]] unsigned int force_dep[64]; + + [[intel::bankwidth(2)]] device_global nonconst_glob3; + [[intel::bankwidth(4)]] const device_global const_glob3; + [[intel::bankwidth(16)]] unsigned int bankw[64]; + + [[intel::simple_dual_port]] device_global nonconst_glob4; + [[intel::simple_dual_port]] const device_global const_glob4; + [[intel::simple_dual_port]] unsigned int simple[64]; + + [[intel::fpga_memory]] device_global nonconst_glob5; + [[intel::fpga_memory("MLAB")]] const device_global const_glob5; + [[intel::fpga_memory("BLOCK_RAM")]] unsigned int mem_block_ram[32]; + + [[intel::bank_bits(3, 4)]] device_global nonconst_glob6; + [[intel::bank_bits(4, 5)]] const device_global const_glob6; + [[intel::bank_bits(3, 4)]] unsigned int mem_block_bits[32]; + + [[intel::fpga_register]] device_global nonconst_glob7; + [[intel::fpga_register]] const device_global const_glob7; + [[intel::fpga_register]] unsigned int reg; + + [[intel::singlepump]] device_global nonconst_glob8; + [[intel::singlepump]] const device_global const_glob8; + [[intel::singlepump]] unsigned int spump; + + [[intel::doublepump]] device_global nonconst_glob9; + [[intel::doublepump]] const device_global const_glob9; + [[intel::doublepump]] unsigned int dpump; + + [[intel::merge("mrg6", "depth")]] device_global nonconst_glob10; + [[intel::merge("mrg6", "depth")]] const device_global const_glob10; + [[intel::merge("mrg6", "width")]] unsigned int mergewidth; + + [[intel::private_copies(32)]] device_global nonconst_glob11; + [[intel::private_copies(8)]] const device_global const_glob11; + [[intel::private_copies(8)]] unsigned int pc; +}; + +struct RandomStruct { + int M; +}; + +// expected-error@+1{{'numbanks' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::numbanks(4)]] RandomStruct S; +// expected-error@+1{{'bankwidth' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::bankwidth(4)]] RandomStruct S1; +// expected-error@+1{{'force_pow2_depth' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::force_pow2_depth(1)]] RandomStruct S2; +// expected-error@+1{{'max_replicates' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::max_replicates(8)]] RandomStruct S3; +// expected-error@+1{{'simple_dual_port' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::simple_dual_port]] RandomStruct S4; + +// expected-error@+1{{'fpga_memory' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::fpga_memory]] RandomStruct S5; + +// expected-error@+1{{'bank_bits' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::bank_bits(4, 5)]] RandomStruct S6; + +// expected-error@+1{{'fpga_register' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::fpga_register]] RandomStruct S7; + +// expected-error@+1{{'singlepump' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::singlepump]] RandomStruct S8; + +// expected-error@+1{{'doublepump' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::doublepump]] RandomStruct S9; + +// expected-error@+1{{'merge' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::merge("mrg1", "width")]] RandomStruct S10; + +//expected-error@+1{{'private_copies' attribute only applies to const variables, local variables, non-static data members and device_global variables}} +[[intel::private_copies(32)]] RandomStruct S11; + +void foo() { + [[intel::numbanks(2)]] int A; + [[intel::numbanks(4)]] static unsigned int ext_five[64]; + [[intel::numbanks(8)]] RandomStruct S; + + [[intel::max_replicates(2)]] int A1; + [[intel::max_replicates(4)]] static unsigned int ext_five1[64]; + [[intel::max_replicates(24)]] RandomStruct S1; + + [[intel::force_pow2_depth(0)]] int A2; + [[intel::force_pow2_depth(1)]] static unsigned int ext_five2[64]; + [[intel::force_pow2_depth(0)]] RandomStruct S2; + + [[intel::bankwidth(2)]] int A3; + [[intel::bankwidth(4)]] static unsigned int ext_five3[64]; + [[intel::bankwidth(8)]] RandomStruct S3; + + [[intel::simple_dual_port]] int A4; + [[intel::simple_dual_port]] static unsigned int ext_five4[64]; + [[intel::simple_dual_port]] RandomStruct S4; + + [[intel::fpga_memory("BLOCK_RAM")]] int A5; + [[intel::fpga_memory("MLAB")]] static unsigned int ext_five5[64]; + [[intel::fpga_memory]] RandomStruct S5; + + [[intel::bank_bits(6, 7)]] int A6; + [[intel::bank_bits(9, 10)]] static unsigned int ext_five6[64]; + [[intel::bank_bits(4, 5)]] RandomStruct S6; + + [[intel::fpga_register]] int A7; + [[intel::fpga_register]] static unsigned int ext_five7[64]; + [[intel::fpga_register]] RandomStruct S7; + + [[intel::singlepump]] int A8; + [[intel::singlepump]] static unsigned int ext_five8[64]; + [[intel::singlepump]] RandomStruct S8; + + [[intel::doublepump]] int A9; + [[intel::doublepump]] static unsigned int ext_five9[64]; + [[intel::doublepump]] RandomStruct S9; + + [[intel::merge("mrg1", "depth")]] int A10; + [[intel::merge("mrg1", "width")]] static unsigned int ext_five10[64]; + [[intel::merge("mrg1", "width")]] RandomStruct S10; + + [[intel::private_copies(8)]] int A11; + //expected-error@+1{{'private_copies' attribute only applies to const variables, local variables, non-static data members and device_global variables}} + [[intel::private_copies(16)]] static unsigned int ext_five11[64]; + [[intel::private_copies(32)]] RandomStruct S11; +} + +void attr_on_const_no_error() +{ + [[intel::numbanks(16)]] const int const_var[64] = {0, 1}; + [[intel::max_replicates(16)]] const int const_var_max[64] = {0, 1}; + [[intel::force_pow2_depth(1)]] const int const_var_force[64] = {0, 1}; + [[intel::bankwidth(16)]] const int const_var_bankw[64] = {0, 1}; + [[intel::simple_dual_port]] const int const_var_simple_dual[64] = {0, 1}; + [[intel::fpga_memory]] const int const_var_mem[64] = {0, 1}; + [[intel::bank_bits(6, 7)]] const int const_var_bits[64] = {0, 1}; + [[intel::fpga_register]] const int const_var_regis[64] = {0, 1}; + [[intel::singlepump]] const int const_var_spump[64] = {0, 1}; + [[intel::doublepump]] const int const_var_dpump[64] = {0, 1}; + [[intel::merge("mrg6", "width")]] const int const_var_mergewid[64] = {0, 1}; +} + +void attr_on_func_arg([[intel::numbanks(8)]] int pc) {} +void attr_on_func_arg1([[intel::max_replicates(8)]] int pc1) {} +void attr_on_func_arg2([[intel::force_pow2_depth(1)]] int pc2) {} +void attr_on_func_arg3([[intel::bankwidth(8)]] int pc3) {} +void attr_on_func_arg4([[intel::simple_dual_port]] int pc4) {} +void attr_on_func_arg5([[intel::fpga_memory]] int pc5) {} +void attr_on_func_arg6([[intel::bank_bits(7, 8)]] int pc6) {} +// expected-error@+1{{'singlepump' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +void attr_on_func_arg7([[intel::singlepump]] int pc7) {} +// expected-error@+1{{'doublepump' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +void attr_on_func_arg8([[intel::doublepump]] int pc8) {} +// expected-error@+1{{'fpga_register' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +void attr_on_func_arg9([[intel::fpga_register]] int pc9) {} +// expected-error@+1{{'merge' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +void attr_on_func_arg10([[intel::merge("mrg1", "width")]] int pc10) {} + +struct [[__sycl_detail__::global_variable_allowed]] GlobAllowedVarOnly { +}; + +// expected-error@+1{{'numbanks' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::numbanks(2)]] GlobAllowedVarOnly GAVO; + +// expected-error@+1{{'max_replicates' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::max_replicates(20)]] GlobAllowedVarOnly GAVO1; + +// expected-error@+1{{'force_pow2_depth' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::force_pow2_depth(0)]] GlobAllowedVarOnly GAVO2; + +// expected-error@+1{{'bankwidth' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::bankwidth(16)]] GlobAllowedVarOnly GAVO3; + +// expected-error@+1{{'simple_dual_port' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::simple_dual_port]] GlobAllowedVarOnly GAVO4; + +// expected-error@+1{{'fpga_memory' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::fpga_memory]] GlobAllowedVarOnly GAVO5; + +// expected-error@+1{{'bank_bits' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::bank_bits(6, 7)]] GlobAllowedVarOnly GAVO6; + +// expected-error@+1{{'fpga_register' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::fpga_register]] GlobAllowedVarOnly GAVO7; + +// expected-error@+1{{'singlepump' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::singlepump]] GlobAllowedVarOnly GAVO8; + +// expected-error@+1{{'doublepump' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::doublepump]] GlobAllowedVarOnly GAVO9; + +// expected-error@+1{{'merge' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::merge("mrg5", "width")]] GlobAllowedVarOnly GAVO10; + +//expected-error@+1{{'private_copies' attribute only applies to const variables, local variables, non-static data members and device_global variables}} +[[intel::private_copies(16)]] GlobAllowedVarOnly GAVO11; + +[[intel::numbanks(4)]] device_global Good; +[[intel::numbanks(4)]] extern device_global Bad; + +[[intel::max_replicates(8)]] device_global Good1; +[[intel::max_replicates(10)]] extern device_global Bad1; + +[[intel::force_pow2_depth(0)]] device_global Good2; +[[intel::force_pow2_depth(0)]] extern device_global Bad2; + +[[intel::bankwidth(2)]] device_global Good3; +[[intel::bankwidth(2)]] extern device_global Bad3; + +[[intel::simple_dual_port]] device_global Good4; +[[intel::simple_dual_port]] extern device_global Bad4; + +[[intel::fpga_memory("MLAB")]] device_global Good5; +[[intel::fpga_memory("BLOCK_RAM")]] extern device_global Bad5; + +[[intel::bank_bits(6, 7)]] device_global Good6; +[[intel::bank_bits(7, 8)]] extern device_global Bad6; + +[[intel::fpga_register]] device_global Good7; +[[intel::fpga_register]] extern device_global Bad7; + +[[intel::doublepump]] device_global Good8; +[[intel::doublepump]] extern device_global Bad8; + +[[intel::singlepump]] device_global Good9; +[[intel::singlepump]] extern device_global Bad9; + +[[intel::merge("mrg1", "depth")]] device_global Good10; +[[intel::merge("mrg1", "depth")]] extern device_global Bad10; + +[[intel::private_copies(16)]] device_global Good11; +[[intel::private_copies(16)]] extern device_global Bad11; + +int main() { + sycl::kernel_single_task([=]() { + Good.get(); + Good1.get(); + Good2.get(); + Good3.get(); + Good4.get(); + Good5.get(); + Good6.get(); + Good7.get(); + Good8.get(); + Good9.get(); + Good10.get(); + Good11.get(); + + // expected-error@+1 {{invalid reference to 'device_global' variable; external 'device_global' variable must be marked with SYCL_EXTERNAL macro}} + Bad.get(); + // expected-error@+1 {{invalid reference to 'device_global' variable; external 'device_global' variable must be marked with SYCL_EXTERNAL macro}} + Bad1.get(); + // expected-error@+1 {{invalid reference to 'device_global' variable; external 'device_global' variable must be marked with SYCL_EXTERNAL macro}} + Bad2.get(); + // expected-error@+1 {{invalid reference to 'device_global' variable; external 'device_global' variable must be marked with SYCL_EXTERNAL macro}} + Bad3.get(); + // expected-error@+1 {{invalid reference to 'device_global' variable; external 'device_global' variable must be marked with SYCL_EXTERNAL macro}} + Bad4.get(); + // expected-error@+1 {{invalid reference to 'device_global' variable; external 'device_global' variable must be marked with SYCL_EXTERNAL macro}} + Bad5.get(); + // expected-error@+1 {{invalid reference to 'device_global' variable; external 'device_global' variable must be marked with SYCL_EXTERNAL macro}} + Bad6.get(); + // expected-error@+1 {{invalid reference to 'device_global' variable; external 'device_global' variable must be marked with SYCL_EXTERNAL macro}} + Bad7.get(); + // expected-error@+1 {{invalid reference to 'device_global' variable; external 'device_global' variable must be marked with SYCL_EXTERNAL macro}} + Bad8.get(); + // expected-error@+1 {{invalid reference to 'device_global' variable; external 'device_global' variable must be marked with SYCL_EXTERNAL macro}} + Bad9.get(); + // expected-error@+1 {{invalid reference to 'device_global' variable; external 'device_global' variable must be marked with SYCL_EXTERNAL macro}} + Bad10.get(); + // expected-error@+1 {{invalid reference to 'device_global' variable; external 'device_global' variable must be marked with SYCL_EXTERNAL macro}} + Bad11.get(); + + (void)GAVO; + (void)GAVO1; + (void)GAVO2; + (void)GAVO3; + (void)GAVO4; + (void)GAVO5; + (void)GAVO6; + (void)GAVO7; + (void)GAVO8; + (void)GAVO9; + (void)GAVO10; + (void)GAVO11; + }); + return 0; +} + +//expected-error@+1{{'numbanks' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::numbanks(2)]] +__attribute__((opencl_global)) unsigned int ocl_glob_num_p2d[64] = {1, 2, 3}; + +//expected-error@+1{{'max_replicates' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::max_replicates(20)]] +__attribute__((opencl_global)) unsigned int ocl_glob_max_p2d[64] = {1, 2, 3}; + +//expected-error@+1{{'bankwidth' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::bankwidth(32)]] +__attribute__((opencl_global)) unsigned int ocl_glob_bankw_p2d[64] = {1, 2, 3}; + +//expected-error@+1{{'simple_dual_port' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::simple_dual_port]] +__attribute__((opencl_global)) unsigned int ocl_glob_simple_p2d[64] = {1, 2, 3}; + +//expected-error@+1{{'fpga_memory' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::fpga_memory("MLAB")]] +__attribute__((opencl_global)) unsigned int ocl_glob_memory_p2d[64] = {1, 2, 3}; + +//expected-error@+1{{'bank_bits' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} +[[intel::bank_bits(7, 8)]] +__attribute__((opencl_global)) unsigned int ocl_glob_bank_bits_p2d[64] = {1, 2, 3}; + +//expected-error@+1{{'fpga_register' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::fpga_register]] +__attribute__((opencl_global)) unsigned int ocl_glob_reg_p2d[64] = {1, 2, 3}; + +//expected-error@+1{{'doublepump' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::doublepump]] +__attribute__((opencl_global)) unsigned int ocl_glob_dpump_p2d[64] = {1, 2, 3}; + +//expected-error@+1{{'singlepump' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::singlepump]] +__attribute__((opencl_global)) unsigned int ocl_glob_spump_p2d[64] = {1, 2, 3}; + +//expected-error@+1{{'merge' attribute only applies to constant variables, local variables, static variables, non-static data members and device_global variables}} +[[intel::merge("mrg4", "depth")]] +__attribute__((opencl_global)) unsigned int ocl_glob_mer_p2d[64] = {1, 2, 3}; + +//expected-error@+1{{'private_copies' attribute only applies to const variables, local variables, non-static data members and device_global variables}} +[[intel::private_copies(8)]] +__attribute__((opencl_global)) unsigned int ocl_glob_pc_p2d[64] = {1, 2, 3}; + +//expected-error@+1{{'private_copies' attribute only applies to const variables, local variables, non-static data members and device_global variables}} +[[intel::private_copies(8)]] +__attribute__((opencl_constant)) unsigned int const_var_private_copies[64] = {1, 2, 3}; + +[[intel::merge("mrg5", "width")]] +__attribute__((opencl_constant)) unsigned int const_var_merge[64] = {1, 2, 3}; + +[[intel::fpga_register]] +__attribute__((opencl_constant)) unsigned int const_var_fpga_register[64] = {1, 2, 3}; + +[[intel::fpga_memory]] +__attribute__((opencl_constant)) unsigned int const_var_fpga_memory[64] = {1, 2, 3}; + +[[intel::bank_bits(2, 3)]] +__attribute__((opencl_constant)) unsigned int const_var_bank_bits[64] = {1, 2, 3}; + +[[intel::numbanks(8)]] +__attribute__((opencl_constant)) unsigned int const_var[64] = {1, 2, 3}; + +[[intel::max_replicates(16)]] +__attribute__((opencl_constant)) unsigned int const_var_max_rep[64] = {1, 2, 3}; + +[[intel::force_pow2_depth(0)]] +__attribute__((opencl_constant)) unsigned int const_force_var[64] = {1, 2, 3}; + +[[intel::bankwidth(32)]] +__attribute__((opencl_constant)) unsigned int const_bankw_var[64] = {1, 2, 3}; + +[[intel::simple_dual_port]] +__attribute__((opencl_constant)) unsigned int const_simple_var[64] = {1, 2, 3}; + +[[intel::doublepump]] +__attribute__((opencl_constant)) unsigned int const_dpump_var[64] = {1, 2, 3}; + +[[intel::singlepump]] +__attribute__((opencl_constant)) unsigned int const_spump_var[64] = {1, 2, 3}; diff --git a/clang/test/SemaSYCL/intel-fpga-local.cpp b/clang/test/SemaSYCL/intel-fpga-local.cpp index d6b48db59bd9e..b171c5078d9f2 100644 --- a/clang/test/SemaSYCL/intel-fpga-local.cpp +++ b/clang/test/SemaSYCL/intel-fpga-local.cpp @@ -442,20 +442,20 @@ void check_gnu_style() { int __attribute__((force_pow2_depth(0))) force_p2d; } -//expected-error@+1{{attribute only applies to local non-const variables and non-static data members}} +//expected-error@+1{{'private_copies' attribute only applies to const variables, local variables, non-static data members and device_global variables}} [[intel::private_copies(8)]] __attribute__((opencl_constant)) unsigned int const_var[64] = {1, 2, 3}; void attr_on_const_error() { - //expected-error@+1{{attribute only applies to local non-const variables and non-static data members}} + //expected-no-error@+1 [[intel::private_copies(8)]] const int const_var[64] = {0, 1}; } -//expected-error@+1{{attribute only applies to local non-const variables and non-static data members}} +//expected-error@+1{{'private_copies' attribute only applies to const variables, local variables, non-static data members and device_global variables}} void attr_on_func_arg([[intel::private_copies(8)]] int pc) {} -//expected-error@+1{{attribute only applies to constant variables, local variables, static variables, agent memory arguments, and non-static data members}} +//expected-error@+1{{'force_pow2_depth' attribute only applies to constant variables, local variables, static variables, agent memory arguments, non-static data members and device_global variables}} [[intel::force_pow2_depth(0)]] __attribute__((opencl_global)) unsigned int ocl_glob_force_p2d[64] = {1, 2, 3}; diff --git a/clang/tools/clang-linker-wrapper/CMakeLists.txt b/clang/tools/clang-linker-wrapper/CMakeLists.txt index 077d617002e94..5556869affaa6 100644 --- a/clang/tools/clang-linker-wrapper/CMakeLists.txt +++ b/clang/tools/clang-linker-wrapper/CMakeLists.txt @@ -28,7 +28,6 @@ endif() add_clang_tool(clang-linker-wrapper ClangLinkerWrapper.cpp - SYCLOffloadWrapper.cpp DEPENDS ${tablegen_deps} diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 14cf4441d8a31..07f8a950d0daf 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -14,12 +14,12 @@ // //===---------------------------------------------------------------------===// -#include "SYCLOffloadWrapper.h" #include "clang/Basic/Version.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/CodeGen/CommandFlags.h" #include "llvm/Frontend/Offloading/OffloadWrapper.h" +#include "llvm/Frontend/Offloading/SYCLOffloadWrapper.h" #include "llvm/Frontend/Offloading/Utility.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticPrinter.h" @@ -741,8 +741,8 @@ readPropertyRegistryFromFile(StringRef File) { // a_n.bin|a_n.prop|a_n.sym // // .bin extension might be a bc, spv or other native extension. -Expected> readSYCLImagesFromTable(StringRef TableFile, - const ArgList &Args) { +Expected> +readSYCLImagesFromTable(StringRef TableFile, const ArgList &Args) { auto TableOrErr = util::SimpleTable::read(TableFile); if (!TableOrErr) return TableOrErr.takeError(); @@ -756,7 +756,7 @@ Expected> readSYCLImagesFromTable(StringRef TableFile, inconvertibleErrorCode(), "expected columns in the table: Code, Properties and Symbols"); - SmallVector Images; + SmallVector Images; for (const util::SimpleTable::Row &row : Table->rows()) { auto ImageOrErr = readBinaryFile(row.getCell("Code")); if (!ImageOrErr) @@ -771,7 +771,7 @@ Expected> readSYCLImagesFromTable(StringRef TableFile, if (!SymbolsOrErr) return SymbolsOrErr.takeError(); - SYCLImage Image; + offloading::SYCLImage Image; Image.Image = std::move(*ImageOrErr); Image.PropertyRegistry = std::move(**PropertiesOrErr); Image.Entries = std::move(*SymbolsOrErr); @@ -811,7 +811,7 @@ Expected wrapSYCLBinariesFromFile(StringRef InputFile, inconvertibleErrorCode(), "can't wrap SYCL image. -triple argument is missed."); - for (SYCLImage &Image : Images) + for (offloading::SYCLImage &Image : Images) Image.Target = Target; LLVMContext C; @@ -822,10 +822,10 @@ Expected wrapSYCLBinariesFromFile(StringRef InputFile, StringRef CompileOptions = Args.getLastArgValue(OPT_sycl_backend_compile_options_EQ); StringRef LinkOptions = Args.getLastArgValue(OPT_sycl_target_link_options_EQ); - SYCLWrappingOptions WrappingOptions; + offloading::SYCLWrappingOptions WrappingOptions; WrappingOptions.CompileOptions = CompileOptions; WrappingOptions.LinkOptions = LinkOptions; - if (Error E = wrapSYCLBinaries(M, Images, WrappingOptions)) + if (Error E = offloading::wrapSYCLBinaries(M, Images, WrappingOptions)) return E; if (Args.hasArg(OPT_print_wrapped_module)) @@ -1796,6 +1796,7 @@ Expected> linkAndWrapDeviceFiles( // separate path inside 'linkDevice' call seen above. // This will eventually be refactored to use the 'common' wrapping logic // that is used for other offload kinds. + std::scoped_lock Guard(ImageMtx); WrappedOutput.push_back(*SYCLOutputOrErr); } diff --git a/libdevice/include/spir_global_var.hpp b/libdevice/include/spir_global_var.hpp index 4bf1cb8daefae..ed757480ae8d4 100644 --- a/libdevice/include/spir_global_var.hpp +++ b/libdevice/include/spir_global_var.hpp @@ -11,15 +11,6 @@ // Treat this header as system one to workaround frontend's restriction #pragma clang system_header -#ifndef SPIR_GLOBAL_VAR -#ifdef __SYCL_DEVICE_ONLY__ -#define SPIR_GLOBAL_VAR __attribute__((sycl_global_var)) -#else -#warning "SPIR_GLOBAL_VAR not defined in host mode. Defining as empty macro." -#define SPIR_GLOBAL_VAR -#endif -#endif - #define __SYCL_GLOBAL__ __attribute__((opencl_global)) #define __SYCL_LOCAL__ __attribute__((opencl_local)) #define __SYCL_PRIVATE__ __attribute__((opencl_private)) diff --git a/llvm/docs/requirements-hashed.txt b/llvm/docs/requirements-hashed.txt index 1f31ecb3b60b5..7452b6f721103 100644 --- a/llvm/docs/requirements-hashed.txt +++ b/llvm/docs/requirements-hashed.txt @@ -16,9 +16,9 @@ beautifulsoup4==4.12.3 \ --hash=sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051 \ --hash=sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed # via furo -certifi==2023.11.17 \ - --hash=sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1 \ - --hash=sha256:e036ab49d5b79556f99cfc2d9320b34cfbe5be05c5871b51de9329f0603b0474 +certifi==2024.2.2 \ + --hash=sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f \ + --hash=sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1 # via requests charset-normalizer==3.3.2 \ --hash=sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027 \ @@ -124,9 +124,9 @@ docutils==0.20.1 \ # myst-parser # recommonmark # sphinx -furo==2023.9.10 \ - --hash=sha256:513092538537dc5c596691da06e3c370714ec99bc438680edc1debffb73e5bfc \ - --hash=sha256:5707530a476d2a63b8cad83b4f961f3739a69f4b058bcf38a03a39fa537195b2 +furo==2024.1.29 \ + --hash=sha256:3548be2cef45a32f8cdc0272d415fcb3e5fa6a0eb4ddfe21df3ecf1fe45a13cf \ + --hash=sha256:4d6b2fe3f10a6e36eb9cc24c1e7beb38d7a23fc7b3c382867503b7fcac8a1e02 # via -r requirements.txt idna==3.6 \ --hash=sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca \ @@ -152,67 +152,67 @@ markdown-it-py==3.0.0 \ # via # mdit-py-plugins # myst-parser -markupsafe==2.1.4 \ - --hash=sha256:0042d6a9880b38e1dd9ff83146cc3c9c18a059b9360ceae207805567aacccc69 \ - --hash=sha256:0c26f67b3fe27302d3a412b85ef696792c4a2386293c53ba683a89562f9399b0 \ - --hash=sha256:0fbad3d346df8f9d72622ac71b69565e621ada2ce6572f37c2eae8dacd60385d \ - --hash=sha256:15866d7f2dc60cfdde12ebb4e75e41be862348b4728300c36cdf405e258415ec \ - --hash=sha256:1c98c33ffe20e9a489145d97070a435ea0679fddaabcafe19982fe9c971987d5 \ - --hash=sha256:21e7af8091007bf4bebf4521184f4880a6acab8df0df52ef9e513d8e5db23411 \ - --hash=sha256:23984d1bdae01bee794267424af55eef4dfc038dc5d1272860669b2aa025c9e3 \ - --hash=sha256:31f57d64c336b8ccb1966d156932f3daa4fee74176b0fdc48ef580be774aae74 \ - --hash=sha256:3583a3a3ab7958e354dc1d25be74aee6228938312ee875a22330c4dc2e41beb0 \ - --hash=sha256:36d7626a8cca4d34216875aee5a1d3d654bb3dac201c1c003d182283e3205949 \ - --hash=sha256:396549cea79e8ca4ba65525470d534e8a41070e6b3500ce2414921099cb73e8d \ - --hash=sha256:3a66c36a3864df95e4f62f9167c734b3b1192cb0851b43d7cc08040c074c6279 \ - --hash=sha256:3aae9af4cac263007fd6309c64c6ab4506dd2b79382d9d19a1994f9240b8db4f \ - --hash=sha256:3ab3a886a237f6e9c9f4f7d272067e712cdb4efa774bef494dccad08f39d8ae6 \ - --hash=sha256:47bb5f0142b8b64ed1399b6b60f700a580335c8e1c57f2f15587bd072012decc \ - --hash=sha256:49a3b78a5af63ec10d8604180380c13dcd870aba7928c1fe04e881d5c792dc4e \ - --hash=sha256:4df98d4a9cd6a88d6a585852f56f2155c9cdb6aec78361a19f938810aa020954 \ - --hash=sha256:5045e892cfdaecc5b4c01822f353cf2c8feb88a6ec1c0adef2a2e705eef0f656 \ - --hash=sha256:5244324676254697fe5c181fc762284e2c5fceeb1c4e3e7f6aca2b6f107e60dc \ - --hash=sha256:54635102ba3cf5da26eb6f96c4b8c53af8a9c0d97b64bdcb592596a6255d8518 \ - --hash=sha256:54a7e1380dfece8847c71bf7e33da5d084e9b889c75eca19100ef98027bd9f56 \ - --hash=sha256:55d03fea4c4e9fd0ad75dc2e7e2b6757b80c152c032ea1d1de487461d8140efc \ - --hash=sha256:698e84142f3f884114ea8cf83e7a67ca8f4ace8454e78fe960646c6c91c63bfa \ - --hash=sha256:6aa5e2e7fc9bc042ae82d8b79d795b9a62bd8f15ba1e7594e3db243f158b5565 \ - --hash=sha256:7653fa39578957bc42e5ebc15cf4361d9e0ee4b702d7d5ec96cdac860953c5b4 \ - --hash=sha256:765f036a3d00395a326df2835d8f86b637dbaf9832f90f5d196c3b8a7a5080cb \ - --hash=sha256:78bc995e004681246e85e28e068111a4c3f35f34e6c62da1471e844ee1446250 \ - --hash=sha256:7a07f40ef8f0fbc5ef1000d0c78771f4d5ca03b4953fc162749772916b298fc4 \ - --hash=sha256:8b570a1537367b52396e53325769608f2a687ec9a4363647af1cded8928af959 \ - --hash=sha256:987d13fe1d23e12a66ca2073b8d2e2a75cec2ecb8eab43ff5624ba0ad42764bc \ - --hash=sha256:9896fca4a8eb246defc8b2a7ac77ef7553b638e04fbf170bff78a40fa8a91474 \ - --hash=sha256:9e9e3c4020aa2dc62d5dd6743a69e399ce3de58320522948af6140ac959ab863 \ - --hash=sha256:a0b838c37ba596fcbfca71651a104a611543077156cb0a26fe0c475e1f152ee8 \ - --hash=sha256:a4d176cfdfde84f732c4a53109b293d05883e952bbba68b857ae446fa3119b4f \ - --hash=sha256:a76055d5cb1c23485d7ddae533229039b850db711c554a12ea64a0fd8a0129e2 \ - --hash=sha256:a76cd37d229fc385738bd1ce4cba2a121cf26b53864c1772694ad0ad348e509e \ - --hash=sha256:a7cc49ef48a3c7a0005a949f3c04f8baa5409d3f663a1b36f0eba9bfe2a0396e \ - --hash=sha256:abf5ebbec056817057bfafc0445916bb688a255a5146f900445d081db08cbabb \ - --hash=sha256:b0fe73bac2fed83839dbdbe6da84ae2a31c11cfc1c777a40dbd8ac8a6ed1560f \ - --hash=sha256:b6f14a9cd50c3cb100eb94b3273131c80d102e19bb20253ac7bd7336118a673a \ - --hash=sha256:b83041cda633871572f0d3c41dddd5582ad7d22f65a72eacd8d3d6d00291df26 \ - --hash=sha256:b835aba863195269ea358cecc21b400276747cc977492319fd7682b8cd2c253d \ - --hash=sha256:bf1196dcc239e608605b716e7b166eb5faf4bc192f8a44b81e85251e62584bd2 \ - --hash=sha256:c669391319973e49a7c6230c218a1e3044710bc1ce4c8e6eb71f7e6d43a2c131 \ - --hash=sha256:c7556bafeaa0a50e2fe7dc86e0382dea349ebcad8f010d5a7dc6ba568eaaa789 \ - --hash=sha256:c8f253a84dbd2c63c19590fa86a032ef3d8cc18923b8049d91bcdeeb2581fbf6 \ - --hash=sha256:d18b66fe626ac412d96c2ab536306c736c66cf2a31c243a45025156cc190dc8a \ - --hash=sha256:d5291d98cd3ad9a562883468c690a2a238c4a6388ab3bd155b0c75dd55ece858 \ - --hash=sha256:d5c31fe855c77cad679b302aabc42d724ed87c043b1432d457f4976add1c2c3e \ - --hash=sha256:d6e427c7378c7f1b2bef6a344c925b8b63623d3321c09a237b7cc0e77dd98ceb \ - --hash=sha256:dac1ebf6983148b45b5fa48593950f90ed6d1d26300604f321c74a9ca1609f8e \ - --hash=sha256:de8153a7aae3835484ac168a9a9bdaa0c5eee4e0bc595503c95d53b942879c84 \ - --hash=sha256:e1a0d1924a5013d4f294087e00024ad25668234569289650929ab871231668e7 \ - --hash=sha256:e7902211afd0af05fbadcc9a312e4cf10f27b779cf1323e78d52377ae4b72bea \ - --hash=sha256:e888ff76ceb39601c59e219f281466c6d7e66bd375b4ec1ce83bcdc68306796b \ - --hash=sha256:f06e5a9e99b7df44640767842f414ed5d7bedaaa78cd817ce04bbd6fd86e2dd6 \ - --hash=sha256:f6be2d708a9d0e9b0054856f07ac7070fbe1754be40ca8525d5adccdbda8f475 \ - --hash=sha256:f9917691f410a2e0897d1ef99619fd3f7dd503647c8ff2475bf90c3cf222ad74 \ - --hash=sha256:fc1a75aa8f11b87910ffd98de62b29d6520b6d6e8a3de69a70ca34dea85d2a8a \ - --hash=sha256:fe8512ed897d5daf089e5bd010c3dc03bb1bdae00b35588c49b98268d4a01e00 +markupsafe==2.1.5 \ + --hash=sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf \ + --hash=sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff \ + --hash=sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f \ + --hash=sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3 \ + --hash=sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532 \ + --hash=sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f \ + --hash=sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617 \ + --hash=sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df \ + --hash=sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4 \ + --hash=sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906 \ + --hash=sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f \ + --hash=sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4 \ + --hash=sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8 \ + --hash=sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371 \ + --hash=sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2 \ + --hash=sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465 \ + --hash=sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52 \ + --hash=sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6 \ + --hash=sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169 \ + --hash=sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad \ + --hash=sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2 \ + --hash=sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0 \ + --hash=sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029 \ + --hash=sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f \ + --hash=sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a \ + --hash=sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced \ + --hash=sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5 \ + --hash=sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c \ + --hash=sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf \ + --hash=sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9 \ + --hash=sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb \ + --hash=sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad \ + --hash=sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3 \ + --hash=sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1 \ + --hash=sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46 \ + --hash=sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc \ + --hash=sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a \ + --hash=sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee \ + --hash=sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900 \ + --hash=sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5 \ + --hash=sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea \ + --hash=sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f \ + --hash=sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5 \ + --hash=sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e \ + --hash=sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a \ + --hash=sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f \ + --hash=sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50 \ + --hash=sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a \ + --hash=sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b \ + --hash=sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4 \ + --hash=sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff \ + --hash=sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2 \ + --hash=sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46 \ + --hash=sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b \ + --hash=sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf \ + --hash=sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5 \ + --hash=sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5 \ + --hash=sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab \ + --hash=sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd \ + --hash=sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68 # via jinja2 mdit-py-plugins==0.4.0 \ --hash=sha256:b51b3bb70691f57f974e257e367107857a93b36f322a9e6d44ca5bf28ec2def9 \ @@ -318,9 +318,9 @@ sphinx==7.2.6 \ # sphinxcontrib-htmlhelp # sphinxcontrib-qthelp # sphinxcontrib-serializinghtml -sphinx-automodapi==0.16.0 \ - --hash=sha256:68fc47064804604b90aa27c047016e86aaf970981d90a0082d5b5dd2e9d38afd \ - --hash=sha256:6c673ef93066408e5ad3e2fa3533044d432a47fe6a826212b9ebf5f52a872554 +sphinx-automodapi==0.17.0 \ + --hash=sha256:4d029cb79eef29413e94ab01bb0177ebd2d5ba86e9789b73575afe9c06ae1501 \ + --hash=sha256:7ccdadad57add4aa9149d9f2bb5cf28c8f8b590280b4735b1156ea8355c423a1 # via -r requirements.txt sphinx-basic-ng==1.0.0b2 \ --hash=sha256:9ec55a47c90c8c002b5960c57492ec3021f5193cb26cebc2dc4ea226848651c9 \ @@ -360,7 +360,7 @@ sphinxcontrib-serializinghtml==1.1.10 \ --hash=sha256:326369b8df80a7d2d8d7f99aa5ac577f51ea51556ed974e7716cfd4fca3f6cb7 \ --hash=sha256:93f3f5dc458b91b192fe10c397e324f262cf163d79f3282c158e8436a2c4511f # via sphinx -urllib3==2.1.0 \ - --hash=sha256:55901e917a5896a349ff771be919f8bd99aff50b79fe58fec595eb37bbc56bb3 \ - --hash=sha256:df7aa8afb0148fa78488e7899b2c59b5f4ffcfa82e6c54ccb9dd37c1d7b52d54 +urllib3==2.2.1 \ + --hash=sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d \ + --hash=sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19 # via requests diff --git a/llvm/docs/requirements.txt b/llvm/docs/requirements.txt index 697ce2e3adb6d..960a6eb644e14 100644 --- a/llvm/docs/requirements.txt +++ b/llvm/docs/requirements.txt @@ -2,8 +2,8 @@ sphinx==7.2.6 docutils==0.20.1 sphinx-markdown-tables==0.0.17 recommonmark==0.7.1 -sphinx-automodapi==0.16.0 +sphinx-automodapi==0.17.0 sphinx-bootstrap-theme==0.8.1 sphinxcontrib-applehelp==1.0.8 -furo==2023.9.10 +furo==2024.1.29 myst-parser==2.0.0 diff --git a/clang/tools/clang-linker-wrapper/SYCLOffloadWrapper.h b/llvm/include/llvm/Frontend/Offloading/SYCLOffloadWrapper.h similarity index 84% rename from clang/tools/clang-linker-wrapper/SYCLOffloadWrapper.h rename to llvm/include/llvm/Frontend/Offloading/SYCLOffloadWrapper.h index 53d820c4557a0..7bbcc794ccdbe 100644 --- a/clang/tools/clang-linker-wrapper/SYCLOffloadWrapper.h +++ b/llvm/include/llvm/Frontend/Offloading/SYCLOffloadWrapper.h @@ -18,6 +18,9 @@ #include #include +namespace llvm { +namespace offloading { + // SYCL binary image formats supported. enum class SYCLBinaryImageFormat { BIF_None, // Undetermined Image kind @@ -50,8 +53,14 @@ struct SYCLWrappingOptions { std::string LinkOptions; }; +/// Wraps the input bundled images and accompanied data into the module \p M +/// as global symbols and registers the images with the SYCL Runtime. +/// \param Options Settings that allows to turn on optional data and settings. llvm::Error wrapSYCLBinaries(llvm::Module &M, llvm::SmallVector &Images, SYCLWrappingOptions Options = SYCLWrappingOptions()); +} // namespace offloading +} // namespace llvm + #endif diff --git a/llvm/lib/Frontend/Offloading/CMakeLists.txt b/llvm/lib/Frontend/Offloading/CMakeLists.txt index 16e0dcfa0e90d..96c6557bb1b59 100644 --- a/llvm/lib/Frontend/Offloading/CMakeLists.txt +++ b/llvm/lib/Frontend/Offloading/CMakeLists.txt @@ -1,6 +1,8 @@ add_llvm_component_library(LLVMFrontendOffloading Utility.cpp OffloadWrapper.cpp + SYCLOffloadWrapper.cpp + ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend diff --git a/clang/tools/clang-linker-wrapper/SYCLOffloadWrapper.cpp b/llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp similarity index 99% rename from clang/tools/clang-linker-wrapper/SYCLOffloadWrapper.cpp rename to llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp index 9d8dfca5cee8f..a68a4b18c9375 100644 --- a/clang/tools/clang-linker-wrapper/SYCLOffloadWrapper.cpp +++ b/llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp @@ -14,7 +14,7 @@ // sycl/include/sycl/detail/pi.h //===----------------------------------------------------------------------===// -#include "SYCLOffloadWrapper.h" +#include "llvm/Frontend/Offloading/SYCLOffloadWrapper.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -40,6 +40,7 @@ #include using namespace llvm; +using namespace llvm::offloading; using namespace llvm::util; namespace { @@ -727,8 +728,9 @@ struct Wrapper { } // anonymous namespace -Error wrapSYCLBinaries(llvm::Module &M, SmallVector &Images, - SYCLWrappingOptions Options) { +Error llvm::offloading::wrapSYCLBinaries(llvm::Module &M, + SmallVector &Images, + SYCLWrappingOptions Options) { Wrapper W(M, Options); GlobalVariable *Desc = W.createFatbinDesc(Images); if (!Desc) diff --git a/sycl/doc/design/CommandGraph.md b/sycl/doc/design/CommandGraph.md index 9cc66626a2e78..56bee306aa9cc 100644 --- a/sycl/doc/design/CommandGraph.md +++ b/sycl/doc/design/CommandGraph.md @@ -236,7 +236,7 @@ Implementation of UR command-buffers for each of the supported SYCL 2020 backends. Backends which are implemented currently are: [Level Zero](#level-zero), -[CUDA](#cuda), and partial support for [OpenCL](#opencl). +[CUDA](#cuda), [HIP](#hip) and partial support for [OpenCL](#opencl). ### Level Zero @@ -351,6 +351,27 @@ An executable CUDA Graph, which contains all commands and synchronization information, is saved in the UR command-buffer to allow for efficient graph resubmission. +### HIP + +The HIP backend offers a Graph managemenet API very similar to CUDA Graph +feature for batching series of operations. +The SYCL Graph HIP backend implementation is therefore very similar to that of CUDA. + +UR commands (e.g. kernels) are mapped as graph nodes using the +[HIP Management API](https://docs.amd.com/projects/HIP/en/docs-5.5.0/doxygen/html/group___graph.html). +Synchronization between commands (UR sync-points) is implemented +using graph dependencies. +Executable HIP Graphs can be submitted to a HIP stream +in the same way as regular kernels. +The HIP backend enables enqueuing events to wait for into a stream. +It also allows signaling the completion of a submission with an event. +Therefore, submitting a UR command-buffer consists only of submitting to a stream +the executable HIP Graph that represent this series of operations. + +An executable HIP Graph, which contains all commands and synchronization +information, is saved in the UR command-buffer to allow for efficient +graph resubmission. + ### OpenCL SYCL-Graph is only enabled for an OpenCL backend when the diff --git a/sycl/doc/design/images/SYCL-Graph-Architecture.svg b/sycl/doc/design/images/SYCL-Graph-Architecture.svg index c554391f6544d..ffd3035495071 100644 --- a/sycl/doc/design/images/SYCL-Graph-Architecture.svg +++ b/sycl/doc/design/images/SYCL-Graph-Architecture.svg @@ -1,4 +1,4 @@ -
Application
Application
SYCL-Graph Extension API
SYCL-Graph Extension API
SYCL Runtime
SYCL Runtime
Unified Runtime + Command Buffer Extension
Unified Runtime + Command Buffer Extension
CUDA
CUDA
NVIDIA GPU
NVIDIA GPU
Level Zero
Level Zero
Intel CPU, GPU, FPGA, ...
Intel CPU, GPU,...
HIP
HIP
OpenCL cl_khr_command_buffer
OpenCL cl_khr_command_b...
AMD GPU
AMD GPU
CPU, GPU, FPGA, ...
CPU, GPU, FPG...
SYCL-Graph Architecture
SYCL-Graph Architecture


Application Layer
Application Layer
Implemented Backend
Implemented Backend
SYCL Runtime
SYCL Runtime
Future Backend Support
Future Backend Support
Legend
Legend
Partial Backend Support
Partial Backend Support
Text is not SVG - cannot display
\ No newline at end of file +
Application
Application
SYCL-Graph Extension API
SYCL-Graph Extension API
SYCL Runtime
SYCL Runtime
Unified Runtime + Command Buffer Extension
Unified Runtime + Command Buffer Extension
CUDA
CUDA
NVIDIA GPU
NVIDIA GPU
Level Zero
Level Zero
Intel CPU, GPU, FPGA, ...
Intel CPU, GPU,...
HIP
HIP
OpenCL cl_khr_command_buffer
OpenCL cl_khr_command_b...
AMD GPU
AMD GPU
CPU, GPU, FPGA, ...
CPU, GPU, FPG...
SYCL-Graph Architecture
SYCL-Graph Architecture


Application Layer
Application Layer
Implemented Backend
Implemented Backend
SYCL Runtime
SYCL Runtime
Legend
Legend
Partial Backend Support
Partial Backend Support
Text is not SVG - cannot display
\ No newline at end of file diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index 1ad10bfbf2d1c..f2a4532b2c81c 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -296,25 +296,46 @@ Adding an executable graph as a sub-graph does not affect its existing node dependencies, such that it could be submitted in future without any side effects of prior uses as a sub-graph. -=== API Modifications +=== Device Info Query [source, c++] ---- -namespace sycl { -namespace ext::oneapi::experimental { - -// State of a queue, returned by queue::ext_oneapi_get_state() -enum class queue_state { - executing, - recording -}; - +namespace sycl::ext::oneapi::experimental { enum class graph_support_level { unsupported, native, emulated }; +} +---- + +Due to the experimental nature of the extension, support is not available across +all devices. The following device support query is added to the +`sycl::ext::oneapi::experimental` namespace for reporting devices which are +are currently supported, and how that support is implemented. + +Table {counter: tableNumber}. Device Info Queries. +[%header] +|=== +| Device Descriptors | Return Type | Description + +|`info::device::graph_support` +|`graph_support_level` +|When passed to `device::get_info<...>()`, the function returns `native` +if there is an underlying SYCL backend command-buffer construct which is used +to propagate the graph to the backend. If no backend construct exists, or +building on top of it has not yet been implemented, then `emulated` is +returned. Otherwise `unsupported` is returned if the SYCL device doesn't +support using this graph extension. + +|=== + +=== Node + +[source, c++] +---- +namespace sycl::ext::oneapi::experimental { enum class node_type { empty, subgraph, @@ -328,46 +349,6 @@ enum class node_type { host_task, }; -namespace property { - -namespace graph { - -class no_cycle_check { - public: - no_cycle_check() = default; -}; - -class assume_buffer_outlives_graph { - public: - assume_buffer_outlives_graph() = default; -}; -} // namespace graph - -namespace node { - -class depends_on { - public: - template - depends_on(NodeTN... nodes); -}; - -class depends_on_all_leaves { - public: - depends_on_all_leaves() = default; -}; - -} // namespace node -} // namespace property - -// Device query for level of support -namespace info { -namespace device { - -struct graphs_support; - -} // namespace device -} // namespace info - class node { public: node() = delete; @@ -381,110 +362,9 @@ public: static node get_node_from_event(event nodeEvent); }; -// State of a graph -enum class graph_state { - modifiable, - executable -}; - -// New object representing graph -template -class command_graph {}; - -template<> -class command_graph { -public: - command_graph(const context& syclContext, const device& syclDevice, - const property_list& propList = {}); - - command_graph(const queue& syclQueue, - const property_list& propList = {}); - - command_graph - finalize(const property_list& propList = {}) const; - - bool begin_recording(queue& recordingQueue, const property_list& propList = {}); - bool begin_recording(const std::vector& recordingQueues, const property_list& propList = {}); - - bool end_recording(); - bool end_recording(queue& recordingQueue); - bool end_recording(const std::vector& recordingQueues); - - node add(const property_list& propList = {}); - - template - node add(T cgf, const property_list& propList = {}); - - void make_edge(node& src, node& dest); - - void print_graph(std::string path, bool verbose = false) const; - - std::vector get_nodes() const; - std::vector get_root_nodes() const; -}; - -template<> -class command_graph { -public: - command_graph() = delete; -}; -} // namespace ext::oneapi::experimental - -// New methods added to the sycl::queue class -using namespace ext::oneapi::experimental; -class queue { -public: - - ext::oneapi::experimental::queue_state - ext_oneapi_get_state() const; - - ext::oneapi::experimental::command_graph - ext_oneapi_get_graph() const; - - /* -- graph convenience shortcuts -- */ - - event ext_oneapi_graph(command_graph& graph); - event ext_oneapi_graph(command_graph& graph, - event depEvent); - event ext_oneapi_graph(command_graph& graph, - const std::vector& depEvents); -}; - -// New methods added to the sycl::handler class -class handler { -public: - void ext_oneapi_graph(command_graph& graph); -} - -} // namespace sycl +} // sycl::namespace ext::oneapi::experimental ---- -=== Device Info Query - -Due to the experimental nature of the extension, support is not available across -all devices. The following device support query is added to the -`sycl::ext::oneapi::experimental` namespace for reporting devices which are -are currently supported, and how that support is implemented. - -Table {counter: tableNumber}. Device Info Queries. -[%header] -|=== -| Device Descriptors | Return Type | Description - -|`info::device::graph_support` -|`graph_support_level` -|When passed to `device::get_info<...>()`, the function returns `native` -if there is an underlying SYCL backend command-buffer construct which is used -to propagate the graph to the backend. If no backend construct exists, or -building on top of it has not yet been implemented, then `emulated` is -returned. Otherwise `unsupported` is returned if the SYCL device doesn't -support using this graph extension. - -|=== - - -=== Node - :crs: https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:reference-semantics Node is a class that encapsulates tasks like SYCL kernel functions, or memory @@ -547,13 +427,6 @@ Exceptions: ==== Depends-On Property -The API for explicitly adding nodes to a `command_graph` includes a -`property_list` parameter. This extension defines the `depends_on` property to -be passed here. `depends_on` defines any `node` objects for the created node to -be dependent on, and therefore form an edge with. These nodes are in addition to -the dependent nodes identified from the command-group requisites of the created -node. - [source,c++] ---- namespace sycl::ext::oneapi::experimental::property::node { @@ -565,23 +438,85 @@ class depends_on { } ---- -==== Depends-On-All-Leaves Property The API for explicitly adding nodes to a `command_graph` includes a -`property_list` parameter. This extension defines the `depends_on_all_leaves` -property to be passed here. `depends_on_all_leaves` provides a shortcut for -adding all the current leaves of a graph as dependencies. +`property_list` parameter. This extension defines the `depends_on` property to +be passed here. `depends_on` defines any `node` objects for the created node to +be dependent on, and therefore form an edge with. These nodes are in addition to +the dependent nodes identified from the command-group requisites of the created +node. + +==== Depends-On-All-Leaves Property [source,c++] ---- namespace sycl::ext::oneapi::experimental::property::node { class depends_on_all_leaves { public: - depends_on_all_leaves(); + depends_on_all_leaves() = default; }; } ---- +The API for explicitly adding nodes to a `command_graph` includes a +`property_list` parameter. This extension defines the `depends_on_all_leaves` +property to be passed here. `depends_on_all_leaves` provides a shortcut for +adding all the current leaves of a graph as dependencies. + === Graph +[source, c++] +---- +namespace sycl::ext::oneapi::experimental { +// State of a graph +enum class graph_state { + modifiable, + executable +}; + +// New object representing graph +template +class command_graph {}; + +template<> +class command_graph { +public: + command_graph(const context& syclContext, const device& syclDevice, + const property_list& propList = {}); + + command_graph(const queue& syclQueue, + const property_list& propList = {}); + + command_graph + finalize(const property_list& propList = {}) const; + + bool begin_recording(queue& recordingQueue, const property_list& propList = {}); + bool begin_recording(const std::vector& recordingQueues, const property_list& propList = {}); + + bool end_recording(); + bool end_recording(queue& recordingQueue); + bool end_recording(const std::vector& recordingQueues); + + node add(const property_list& propList = {}); + + template + node add(T cgf, const property_list& propList = {}); + + void make_edge(node& src, node& dest); + + void print_graph(std::string path, bool verbose = false) const; + + std::vector get_nodes() const; + std::vector get_root_nodes() const; +}; + +template<> +class command_graph { +public: + command_graph() = delete; +}; + +} // namespace sycl::ext::oneapi::experimental +---- + This extension adds a new `command_graph` object which follows the {crs}[common reference semantics] of other SYCL runtime objects. @@ -626,6 +561,16 @@ graph LR ===== No-Cycle-Check Property +[source,c++] +---- +namespace sycl::ext::oneapi::experimental::property::graph { +class no_cycle_check { + public: + no_cycle_check() = default; +}; +} +---- + The `property::graph::no_cycle_check` property disables any checks if a newly added dependency will lead to a cycle in a specific `command_graph` and can be passed to a `command_graph` on construction via the property list parameter. @@ -638,6 +583,16 @@ behavior. ===== Assume-Buffer-Outlives-Graph Property [[assume-buffer-outlives-graph-property]] +[source,c++] +---- +namespace sycl::ext::oneapi::experimental::property::graph { +class assume_buffer_outlives_graph { + public: + assume_buffer_outlives_graph() = default; +}; +} +---- + The `property::graph::assume_buffer_outlives_graph` property disables <> in a `command_graph` and can be passed to a `command_graph` on construction via the property list @@ -664,7 +619,7 @@ command_graph(const context& syclContext, `syclContext` and device `syclDevice`. Zero or more properties can be provided to the constructed SYCL `command_graph` via an instance of `property_list`. -Preconditions: +Constraints: * This constructor is only available when the `command_graph` state is `graph_state::modifiable`. @@ -699,7 +654,7 @@ command_graph(const queue& syclQueue, Zero or more properties can be provided to the constructed SYCL `command_graph` via an instance of `property_list`. -Preconditions: +Constraints: * This constructor is only available when the `command_graph` state is `graph_state::modifiable`. @@ -734,7 +689,7 @@ node add(const property_list& propList = {}); to make a connection point inside a graph between groups of nodes, and can significantly reduce the number of edges ( O(n) vs. O(n^2^) ). -Preconditions: +Constraints: * This member function is only available when the `command_graph` state is `graph_state::modifiable`. @@ -769,7 +724,7 @@ into the graph and executed asynchronously when the graph is submitted to a queue. The requisites of `cgf` will be used to identify any dependent nodes in the graph to form edges with. -Preconditions: +Constraints: * This member function is only available when the `command_graph` state is `graph_state::modifiable`. @@ -803,7 +758,7 @@ void make_edge(node& src, node& dest); |Creates a dependency between two nodes representing a happens-before relationship. -Preconditions: +Constraints: * This member function is only available when the `command_graph` state is `graph_state::modifiable`. @@ -842,7 +797,7 @@ to create subsequent executable graphs. It is also valid to continue to add new nodes to the modifiable graph instance after calling this function. It is valid to finalize an empty graph instance with no recorded commands. -Preconditions: +Constraints: * This member function is only available when the `command_graph` state is `graph_state::modifiable`. @@ -1024,6 +979,39 @@ Exceptions: === Queue Class Modifications +[source, c++] +---- +namespace sycl { +namespace ext::oneapi::experimental { +enum class queue_state { + executing, + recording +}; + +} // namespace ext::oneapi::experimental + +// New methods added to the sycl::queue class +using namespace ext::oneapi::experimental; +class queue { +public: + + ext::oneapi::experimental::queue_state + ext_oneapi_get_state() const; + + ext::oneapi::experimental::command_graph + ext_oneapi_get_graph() const; + + /* -- graph convenience shortcuts -- */ + + event ext_oneapi_graph(command_graph& graph); + event ext_oneapi_graph(command_graph& graph, + event depEvent); + event ext_oneapi_graph(command_graph& graph, + const std::vector& depEvents); +}; +} // namespace sycl +---- + :queue-class: https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:interface.queue.class This extension modifies the {queue-class}[SYCL queue class] such that @@ -1767,7 +1755,7 @@ update results in undefined behavior: The effects of the update will be visible on the next submission of the executable graph without the need for additional user synchronization. -Preconditions: +Constraints: * This member function is only available when the `command_graph` state is `graph_state::executable`. diff --git a/sycl/doc/extensions/experimental/sycl_ext_private_alloca.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_private_alloca.asciidoc similarity index 100% rename from sycl/doc/extensions/experimental/sycl_ext_private_alloca.asciidoc rename to sycl/doc/extensions/experimental/sycl_ext_oneapi_private_alloca.asciidoc diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_root_group.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_root_group.asciidoc similarity index 97% rename from sycl/doc/extensions/proposed/sycl_ext_oneapi_root_group.asciidoc rename to sycl/doc/extensions/experimental/sycl_ext_oneapi_root_group.asciidoc index 44eb1b376d113..a442556173fcd 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_root_group.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_root_group.asciidoc @@ -52,11 +52,12 @@ This extension also depends on the following other SYCL extensions: == Status -This is a proposed extension specification, intended to gather community -feedback. Interfaces defined in this specification may not be implemented yet -or may be in a preliminary state. The specification itself may also change in -incompatible ways before it is finalized. *Shipping software products should -not rely on APIs defined in this specification.* +This is an experimental extension specification, intended to provide early +access to features and gather community feedback. Interfaces defined in this +specification are implemented in {dpcpp}, but they are not finalized and may +change incompatibly in future versions of {dpcpp} without prior notice. +*Shipping software products should not rely on APIs defined in this +specification.* == Overview diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_forward_progress.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_forward_progress.asciidoc index 227b23f1ffdac..e99c60aa9f3aa 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_forward_progress.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_forward_progress.asciidoc @@ -254,7 +254,7 @@ of forward progress guarantee defined in the {cpp} core language specification. [source,c++] ---- -namespace ext::oneapi::experimental::sycl { +namespace sycl::ext::oneapi::experimental { enum class forward_progress_guarantee { concurrent, @@ -279,7 +279,7 @@ execution within a SYCL implementation. [source,c++] ---- -namespace ext::oneapi::experimental::sycl { +namespace sycl::ext::oneapi::experimental { enum class execution_scope { work_item, diff --git a/sycl/include/sycl/detail/string.hpp b/sycl/include/sycl/detail/string.hpp new file mode 100644 index 0000000000000..28caa74d37357 --- /dev/null +++ b/sycl/include/sycl/detail/string.hpp @@ -0,0 +1,72 @@ +//==----------------- string.hpp - SYCL standard header file ---------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#pragma once + +namespace sycl { +inline namespace _V1 { +namespace detail { + +// This class and detail::string_view class are intended to support +// different ABIs between libsycl and the user program. +// This class is not inteded to replace std::string for general purpose usage. +class string { + char *str = nullptr; + +public: + string() noexcept = default; + ~string() { delete[] str; } + + string(std::string_view strn) { + size_t len = strn.length(); + str = new char[len + 1]; + strn.copy(str, len); + str[len] = 0; + } + + friend void swap(string &lhs, string &rhs) noexcept { + std::swap(lhs.str, rhs.str); + } + + string(string &&other) noexcept { swap(*this, other); } + string(const string &other) { + if (other.str == nullptr) + return; + *this = string{other.str}; + } + + string &operator=(string &&other) noexcept { + swap(*this, other); + return *this; + } + string &operator=(const string &other) { + *this = string{other}; + return *this; + } + + string &operator=(std::string_view strn) { + *this = string{strn}; + return *this; + } + + const char *c_str() const noexcept { return str ? str : ""; } + + friend bool operator==(const string &lhs, std::string_view rhs) noexcept { + return rhs == lhs.c_str(); + } + friend bool operator==(std::string_view lhs, const string &rhs) noexcept { + return lhs == rhs.c_str(); + } +}; + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/detail/string_view.hpp b/sycl/include/sycl/detail/string_view.hpp new file mode 100644 index 0000000000000..7815ecc4ce676 --- /dev/null +++ b/sycl/include/sycl/detail/string_view.hpp @@ -0,0 +1,52 @@ +//==-------------- string_view.hpp - SYCL standard header file -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#pragma once + +namespace sycl { +inline namespace _V1 { +namespace detail { + +// This class and detail::string class are intended to support +// different ABIs between libsycl and the user program. +// This class is not inteded to replace std::string_view for general purpose +// usage. +class string_view { + const char *str = nullptr; + +public: + string_view() noexcept = default; + string_view(const string_view &strn) noexcept = default; + string_view(string_view &&strn) noexcept = default; + string_view(std::string_view strn) noexcept : str(strn.data()) {} + + string_view &operator=(string_view &&strn) noexcept = default; + string_view &operator=(const string_view &strn) noexcept = default; + + string_view &operator=(std::string_view strn) noexcept { + str = strn.data(); + return *this; + } + + const char *data() const noexcept { return str; } + + friend bool operator==(const string_view &lhs, + std::string_view rhs) noexcept { + return rhs == lhs.data(); + } + friend bool operator==(std::string_view lhs, + const string_view &rhs) noexcept { + return lhs == rhs.data(); + } +}; + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/detail/util.hpp b/sycl/include/sycl/detail/util.hpp index 1ce0a059bee19..4b831e2e38e8f 100644 --- a/sycl/include/sycl/detail/util.hpp +++ b/sycl/include/sycl/detail/util.hpp @@ -11,7 +11,9 @@ #ifndef __SYCL_DEVICE_ONLY #include - +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +#include +#endif #include #include #include @@ -67,6 +69,26 @@ struct CmpCStr { using SerializedObj = std::vector; +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +template struct ABINeutralT { using type = T; }; +// We need special handling of std::string to handle ABI incompatibility +// for get_info<>() when it returns std::string and vector. +// For this purpose, get_info_impl<>() is created to handle special +// cases, and it is only called internally and not exposed to the user. +// The following ReturnType structure is intended for general return type, +// and special return types (std::string and vector of it). + +template <> struct ABINeutralT { using type = detail::string; }; + +template <> struct ABINeutralT> { + using type = std::vector; +}; + +template using ABINeutralT_t = typename ABINeutralT::type; +#else +template using ABINeutralT_t = T; +#endif + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index 03b5a8f10ca33..df265e1585e83 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -8,24 +8,30 @@ #pragma once -#include // for aspect -#include // for backend -#include // for __SY... -#include // for __SY... -#include // for is_d... -#include // for Owne... -#include // for pi_n... -#include // for Enab... -#include // for arch... -#include // for part... -#include // for plat... - -#include // for size_t -#include // for shar... -#include // for string -#include // for add_... -#include // for hash -#include // for vector +#include +#include +#include +#include +#include +#include +#include +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +#include +#include +#endif +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include namespace sycl { inline namespace _V1 { @@ -214,8 +220,17 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase { /// type associated with the param parameter. /// /// \return device info of type described in Table 4.20. +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + template + typename detail::is_device_info_desc::return_type get_info() const { + return detail::convert_from_abi_neutral(get_info_impl()); + } +#else template - typename detail::is_device_info_desc::return_type get_info() const; + detail::ABINeutralT_t< + typename detail::is_device_info_desc::return_type> + get_info() const; +#endif /// Check SYCL extension support by device /// @@ -291,6 +306,13 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase { template friend auto get_native(const SyclObjectT &Obj) -> backend_return_t; + +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + template + typename detail::ABINeutralT_t< + typename detail::is_device_info_desc::return_type> + get_info_impl() const; +#endif }; } // namespace _V1 diff --git a/sycl/include/sycl/exception.hpp b/sycl/include/sycl/exception.hpp index f5c52ff3d8257..59bbd9bcdf774 100644 --- a/sycl/include/sycl/exception.hpp +++ b/sycl/include/sycl/exception.hpp @@ -74,11 +74,21 @@ class __SYCL_EXPORT exception : public virtual std::exception { exception(std::error_code, const char *Msg); +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + exception(std::error_code Ec, const std::string &Msg) + : exception(Ec, nullptr, Msg.c_str()) {} +#else exception(std::error_code, const std::string &Msg); +#endif // new SYCL 2020 constructors exception(std::error_code); +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + exception(int EV, const std::error_category &ECat, const std::string &WhatArg) + : exception(EV, ECat, WhatArg.c_str()) {} +#else exception(int, const std::error_category &, const std::string &); +#endif exception(int, const std::error_category &, const char *); exception(int, const std::error_category &); @@ -111,13 +121,13 @@ class __SYCL_EXPORT exception : public virtual std::exception { protected: // base constructors used by SYCL 1.2.1 exception subclasses - exception(std::error_code ec, const char *Msg, const pi_int32 PIErr, + exception(std::error_code Ec, const char *Msg, const pi_int32 PIErr, std::shared_ptr Context = nullptr) - : exception(ec, std::string(Msg), PIErr, Context) {} + : exception(Ec, std::string(Msg), PIErr, Context) {} - exception(std::error_code ec, const std::string &Msg, const pi_int32 PIErr, + exception(std::error_code Ec, const std::string &Msg, const pi_int32 PIErr, std::shared_ptr Context = nullptr) - : exception(ec, Context, Msg + " " + detail::codeToString(PIErr)) { + : exception(Ec, Context, Msg + " " + detail::codeToString(PIErr)) { MPIErr = PIErr; } @@ -125,10 +135,18 @@ class __SYCL_EXPORT exception : public virtual std::exception { : MMsg(std::make_shared(Msg)), MContext(nullptr) {} // base constructor for all SYCL 2020 constructors - // exception(context *ctxPtr, std::error_code ec, const std::string + // exception(context *ctxPtr, std::error_code Ec, const std::string // &what_arg); - exception(std::error_code ec, std::shared_ptr SharedPtrCtx, +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + exception(std::error_code Ec, std::shared_ptr SharedPtrCtx, + const std::string &what_arg) + : exception(Ec, SharedPtrCtx, what_arg.c_str()) {} + exception(std::error_code Ec, std::shared_ptr SharedPtrCtx, + const char *WhatArg); +#else + exception(std::error_code Ec, std::shared_ptr SharedPtrCtx, const std::string &what_arg); +#endif }; class __SYCL2020_DEPRECATED( @@ -143,12 +161,12 @@ class __SYCL2020_DEPRECATED( runtime_error(const std::string &Msg, pi_int32 Err) : exception(make_error_code(errc::runtime), Msg, Err) {} - runtime_error(std::error_code ec, const std::string &Msg, + runtime_error(std::error_code Ec, const std::string &Msg, const pi_int32 PIErr) - : exception(ec, Msg, PIErr) {} + : exception(Ec, Msg, PIErr) {} protected: - runtime_error(std::error_code ec) : exception(ec) {} + runtime_error(std::error_code Ec) : exception(Ec) {} }; class __SYCL2020_DEPRECATED("use sycl::exception with sycl::errc::kernel or " @@ -230,10 +248,10 @@ class __SYCL2020_DEPRECATED( : exception(make_error_code(errc::invalid), Msg, Err) {} protected: - device_error(std::error_code ec) : exception(ec) {} + device_error(std::error_code Ec) : exception(Ec) {} - device_error(std::error_code ec, const std::string &Msg, const pi_int32 PIErr) - : exception(ec, Msg, PIErr) {} + device_error(std::error_code Ec, const std::string &Msg, const pi_int32 PIErr) + : exception(Ec, Msg, PIErr) {} }; class __SYCL2020_DEPRECATED( diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 7a6371624b3ef..51e2f41de9d75 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -21,6 +21,10 @@ #include #include #include +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +#include +#include +#endif #include #include #include @@ -415,6 +419,10 @@ template bool range_size_fits_in_size_t(const range &r) { } return true; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +using string = std::string; +using string_view = std::string; +#endif } // namespace detail @@ -543,7 +551,7 @@ class __SYCL_EXPORT handler { bool IsKernelCreatedFromSource, bool IsESIMD); /// \return a string containing name of SYCL kernel. - std::string getKernelName(); + detail::string getKernelName(); template bool lambdaAndKernelHaveEqualName() { // TODO It is unclear a kernel and a lambda/functor must to be equal or not @@ -553,8 +561,8 @@ class __SYCL_EXPORT handler { // values of arguments for the kernel. assert(MKernel && "MKernel is not initialized"); const std::string LambdaName = detail::KernelInfo::getName(); - const std::string KernelName = getKernelName(); - return LambdaName == KernelName; + detail::string KernelName = getKernelName(); + return KernelName == LambdaName; } /// Saves the location of user's code passed in \p CodeLoc for future usage in @@ -837,7 +845,14 @@ class __SYCL_EXPORT handler { /// /// \param KernelName is the name of the SYCL kernel to check that the used /// kernel bundle contains. +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + void verifyUsedKernelBundle(const std::string &KernelName) { + verifyUsedKernelBundleInternal(detail::string_view{KernelName}); + } + void verifyUsedKernelBundleInternal(detail::string_view KernelName); +#else void verifyUsedKernelBundle(const std::string &KernelName); +#endif /// Stores lambda to the template-free object /// @@ -3307,7 +3322,7 @@ class __SYCL_EXPORT handler { std::vector MAssociatedAccesors; /// Struct that encodes global size, local size, ... detail::NDRDescT MNDRDesc; - std::string MKernelName; + detail::string MKernelName; /// Storage for a sycl::kernel object. std::shared_ptr MKernel; /// Type of the command group, e.g. kernel, fill. Can also encode version. @@ -3409,8 +3424,17 @@ class __SYCL_EXPORT handler { /// expr m_Storage member /// \param Size the size of data getting read back / to. /// \param Block if read operation is blocking, default to false. +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + void ext_intel_read_host_pipe(const std::string &Name, void *Ptr, size_t Size, + bool Block = false) { + ext_intel_read_host_pipe(detail::string_view(Name), Ptr, Size, Block); + } + void ext_intel_read_host_pipe(detail::string_view Name, void *Ptr, + size_t Size, bool Block = false); +#else void ext_intel_read_host_pipe(const std::string &Name, void *Ptr, size_t Size, bool Block = false); +#endif /// Write to host pipes given a host address and /// \param Name name of the host pipe to be passed into lower level runtime @@ -3418,8 +3442,17 @@ class __SYCL_EXPORT handler { /// expr m_Storage member /// \param Size the size of data getting read back / to. /// \param Block if write opeartion is blocking, default to false. +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES void ext_intel_write_host_pipe(const std::string &Name, void *Ptr, + size_t Size, bool Block = false) { + ext_intel_write_host_pipe(detail::string_view(Name), Ptr, Size, Block); + } + void ext_intel_write_host_pipe(detail::string_view Name, void *Ptr, size_t Size, bool Block = false); +#else + void ext_intel_write_host_pipe(const std::string &Name, void *Ptr, + size_t Size, bool Block = false); +#endif friend class ext::oneapi::experimental::detail::graph_impl; bool DisableRangeRounding(); diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index d48cf27b3d531..6114defc47f21 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -15,10 +15,13 @@ #include // for OwnerLessBase #include // for pi_native_handle #include // for cast -#include // for device -#include // for kernel, kernel_bundle -#include // for bundle_state -#include // for property_list +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +#include +#endif +#include // for device +#include // for kernel, kernel_bundle +#include // for bundle_state +#include // for property_list #include // PropertyT #include // build_options @@ -447,9 +450,12 @@ kernel_bundle(kernel_bundle &&) -> kernel_bundle; ///////////////////////// namespace detail { +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +using string_view = std::string; +#endif // Internal non-template versions of get_kernel_id API which is used by public // onces -__SYCL_EXPORT kernel_id get_kernel_id_impl(std::string KernelName); +__SYCL_EXPORT kernel_id get_kernel_id_impl(string_view KernelName); } // namespace detail /// \returns the kernel_id associated with the KernelName @@ -457,7 +463,11 @@ template kernel_id get_kernel_id() { // FIXME: This must fail at link-time if KernelName not in any available // translation units. using KI = sycl::detail::KernelInfo; +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + return detail::get_kernel_id_impl(detail::string_view{KI::getName()}); +#else return detail::get_kernel_id_impl(KI::getName()); +#endif } /// \returns a vector with all kernel_id's defined in the application diff --git a/sycl/include/sycl/platform.hpp b/sycl/include/sycl/platform.hpp index 8cffdcd99e67e..32a93acd0a873 100644 --- a/sycl/include/sycl/platform.hpp +++ b/sycl/include/sycl/platform.hpp @@ -8,26 +8,31 @@ #pragma once -#include // for aspect -#include // for backend, backend_return_t -#include // for context -#include // for __SYCL2020_DEPRECATED -#include // for __SYCL_EXPORT -#include // for is_platform_info_desc -#include // for OwnerLessBase -#include // for pi_native_handle -#include // for EnableIfSYCL2020DeviceS... -#include // for device_type +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +#include +#include +#endif +#include +#include +#include #ifdef __SYCL_INTERNAL_API #include #endif -#include // for size_t -#include // for shared_ptr, hash, opera... -#include // for string -#include // for hash -#include // for vector +#include +#include +#include +#include +#include namespace sycl { inline namespace _V1 { @@ -51,6 +56,47 @@ class platform_impl; /// /// \param Val Indicates if extension should be enabled/disabled void __SYCL_EXPORT enable_ext_oneapi_default_context(bool Val); + +template auto convert_to_abi_neutral(ParamT &&Info) { +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + using ParamNoRef = std::remove_reference_t; + if constexpr (std::is_same_v) { + return detail::string{Info}; + } else if constexpr (std::is_same_v>) { + std::vector Res; + Res.reserve(Info.size()); + for (std::string &Str : Info) { + Res.push_back(detail::string{Str}); + } + return Res; + } else { + return std::forward(Info); + } +#else + return std::forward(Info); +#endif +} + +template auto convert_from_abi_neutral(ParamT &&Info) { +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + using ParamNoRef = std::remove_reference_t; + if constexpr (std::is_same_v) { + return Info.c_str(); + } else if constexpr (std::is_same_v>) { + std::vector Res; + Res.reserve(Info.size()); + for (detail::string &Str : Info) { + Res.push_back(Str.c_str()); + } + return Res; + } else { + return std::forward(Info); + } +#else + return std::forward(Info); +#endif +} } // namespace detail namespace ext::oneapi { // Forward declaration @@ -144,9 +190,17 @@ class __SYCL_EXPORT platform : public detail::OwnerLessBase { /// Queries this SYCL platform for info. /// /// The return type depends on information being queried. +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES template - typename detail::is_platform_info_desc::return_type get_info() const; - + typename detail::is_platform_info_desc::return_type get_info() const { + return detail::convert_from_abi_neutral(get_info_impl()); + } +#else + template + detail::ABINeutralT_t< + typename detail::is_platform_info_desc::return_type> + get_info() const; +#endif /// Returns all available SYCL platforms in the system. /// /// The resulting vector always contains a single SYCL host platform instance. @@ -206,6 +260,13 @@ class __SYCL_EXPORT platform : public detail::OwnerLessBase { template friend auto get_native(const SyclObjectT &Obj) -> backend_return_t; + +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + template + typename detail::ABINeutralT_t< + typename detail::is_platform_info_desc::return_type> + get_info_impl() const; +#endif }; // class platform } // namespace _V1 } // namespace sycl diff --git a/sycl/plugins/native_cpu/CMakeLists.txt b/sycl/plugins/native_cpu/CMakeLists.txt index eaad9c821c46b..b602f7cfc4c3d 100644 --- a/sycl/plugins/native_cpu/CMakeLists.txt +++ b/sycl/plugins/native_cpu/CMakeLists.txt @@ -28,12 +28,13 @@ if(NATIVECPU_USE_OCK) include(FetchContent) FetchContent_Declare(oneapi-ck GIT_REPOSITORY https://github.com/codeplaysoftware/oneapi-construction-kit.git - # commit 63f4ba99fc758ffc4268a807b21816b6be1b1b68 - # Author: PietroGhg <38155419+PietroGhg@users.noreply.github.com> - # Date: Wed Jan 31 14:06:36 2024 +0100 - # Merge pull request #326 from PietroGhg/pietro/vecz - # Integrate vecz in experimental branch - GIT_TAG 63f4ba99fc758ffc4268a807b21816b6be1b1b68 + # commit 62090f9d469a30e0595f445ff664ea2ecfe77b44 + # Merge: 6151546a af33ddfb + # Author: PietroGhg + # Date: Thu Feb 29 12:46:49 2024 +0100 + # Merge pull request #385 from PietroGhg/pietro/multi_llvm_include + # [NATIVECPU] Avoid specifying include path manually for multi_llvm and Native CPU + GIT_TAG 62090f9d469a30e0595f445ff664ea2ecfe77b44 ) FetchContent_GetProperties(oneapi-ck) if(NOT oneapi-ck_POPULATED) @@ -43,6 +44,12 @@ if(NATIVECPU_USE_OCK) set(CA_NATIVE_CPU 1) add_subdirectory(${oneapi-ck_SOURCE_DIR} ${oneapi-ck_BINARY_DIR} EXCLUDE_FROM_ALL) endif() + install(TARGETS multi_llvm + EXPORT;LLVMExports + LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT multi_llvm + ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT multi_llvm + RUNTIME DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT multi_llvm) + set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS multi_llvm) target_compile_definitions(LLVMSYCLLowerIR PRIVATE NATIVECPU_USE_OCK) target_include_directories(LLVMSYCLLowerIR PRIVATE ${oneapi-ck_SOURCE_DIR}/modules/compiler/multi_llvm/include diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 6e4bdac038fda..d3c2b333844fe 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -57,13 +57,14 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 9b936b5748a8da86123f3f148d1cfa84e52531c4 - # Merge: 91c6068b 767cfd1a - # Author: Piotr Balcer - # Date: Mon Feb 26 10:46:55 2024 +0100 - # Merge pull request #1354 from AllanZyne/sanitizer-device-global - # [DeviceSanitizer] Support detecting out-of-bounds error on DeviceGlobals - set(UNIFIED_RUNTIME_TAG 9b936b5748a8da86123f3f148d1cfa84e52531c4) + + # commit a2757b2931daa2f8d7c9dd51b0fc846be1fd49a7 + # Merge: 9b936b5 + f78d369 + # Author: Kenneth Benzie (Benie) + # Date: Tue Feb 27 11:34:58 2024 +0000 + # Merge pull request #1254 from Bensuo/cmdbuf-support-hip + # [EXP][CMDBUF] HIP adapter support for command buffers + set(UNIFIED_RUNTIME_TAG a2757b2931daa2f8d7c9dd51b0fc846be1fd49a7 ) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index 1fdf229860022..633a8227873ad 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -270,6 +270,13 @@ template <> class SYCLConfig { } const char *ValStr = BaseT::getRawValue(); if (ValStr) { + // Throw if the input string is empty. + if (ValStr[0] == '\0') + throw invalid_parameter_error( + "Invalid value for ONEAPI_DEVICE_SELECTOR environment " + "variable: value should not be null.", + PI_ERROR_INVALID_VALUE); + DeviceTargets = &GlobalHandler::instance().getOneapiDeviceSelectorTargets(ValStr); } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index c6b7be77803c7..ad06891ebe568 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -836,10 +836,9 @@ class queue_impl { if (IsKernel) // Kernel only uses assert if it's non interop one - KernelUsesAssert = - !(Handler.MKernel && Handler.MKernel->isInterop()) && - ProgramManager::getInstance().kernelUsesAssert(Handler.MKernelName); - + KernelUsesAssert = !(Handler.MKernel && Handler.MKernel->isInterop()) && + ProgramManager::getInstance().kernelUsesAssert( + Handler.MKernelName.c_str()); finalizeHandler(Handler, Event); (*PostProcess)(IsKernel, KernelUsesAssert, Event); diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 73a05080c5b0d..2bf76a3fa9407 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -15,8 +15,6 @@ #include #include -#include - namespace sycl { inline namespace _V1 { namespace detail { @@ -135,14 +133,23 @@ bool device::has_extension(const std::string &extension_name) const { } template -typename detail::is_device_info_desc::return_type +detail::ABINeutralT_t::return_type> +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +device::get_info_impl() const { +#else device::get_info() const { - return impl->template get_info(); +#endif + return detail::convert_to_abi_neutral(impl->template get_info()); } // Explicit override. Not fulfilled by #include device_traits.def below. template <> +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +__SYCL_EXPORT device +device::get_info_impl() const { +#else __SYCL_EXPORT device device::get_info() const { +#endif // With ONEAPI_DEVICE_SELECTOR the impl.MRootDevice is preset and may be // overridden (ie it may be nullptr on a sub-device) The PI of the sub-devices // have parents, but we don't want to return them. They must pretend to be @@ -157,7 +164,11 @@ __SYCL_EXPORT device device::get_info() const { template <> __SYCL_EXPORT std::vector +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +device::get_info_impl() const { +#else device::get_info() const { +#endif std::vector DeviceAspects{ #define __SYCL_ASPECT(ASPECT, ID) aspect::ASPECT, #include @@ -181,14 +192,25 @@ device::get_info() const { } template <> +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +__SYCL_EXPORT bool device::get_info_impl() const { +#else __SYCL_EXPORT bool device::get_info() const { +#endif // Explicit specialization is needed due to the class of info handle. The // implementation is done in get_device_info_impl. return impl->template get_info(); } +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ - template __SYCL_EXPORT ReturnT device::get_info() const; + template __SYCL_EXPORT detail::ABINeutralT_t \ + device::get_info_impl() const; +#else +#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ + template __SYCL_EXPORT detail::ABINeutralT_t \ + device::get_info() const; +#endif #define __SYCL_PARAM_TRAITS_SPEC_SPECIALIZED(DescType, Desc, ReturnT, PiCode) @@ -196,9 +218,15 @@ __SYCL_EXPORT bool device::get_info() const { #undef __SYCL_PARAM_TRAITS_SPEC_SPECIALIZED #undef __SYCL_PARAM_TRAITS_SPEC +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES #define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT, PiCode) \ - template __SYCL_EXPORT ReturnT \ + template __SYCL_EXPORT detail::ABINeutralT_t \ + device::get_info_impl() const; +#else +#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT, PiCode) \ + template __SYCL_EXPORT typename detail::ABINeutralT_t \ device::get_info() const; +#endif #include #include diff --git a/sycl/source/exception.cpp b/sycl/source/exception.cpp index 88141f613fd53..ad1d39e3dae18 100644 --- a/sycl/source/exception.cpp +++ b/sycl/source/exception.cpp @@ -19,15 +19,19 @@ inline namespace _V1 { exception::exception(std::error_code EC, const char *Msg) : exception(EC, nullptr, Msg) {} +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES exception::exception(std::error_code EC, const std::string &Msg) : exception(EC, nullptr, Msg) {} +#endif // new SYCL 2020 constructors exception::exception(std::error_code EC) : exception(EC, nullptr, "") {} +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES exception::exception(int EV, const std::error_category &ECat, const std::string &WhatArg) : exception({EV, ECat}, nullptr, WhatArg) {} +#endif exception::exception(int EV, const std::error_category &ECat, const char *WhatArg) @@ -58,12 +62,21 @@ exception::exception(context Ctx, int EV, const std::error_category &ECat) : exception(Ctx, EV, ECat, "") {} // protected base constructor for all SYCL 2020 constructors +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +exception::exception(std::error_code EC, std::shared_ptr SharedPtrCtx, + const char *WhatArg) + : MMsg(std::make_shared(WhatArg)), + MPIErr(PI_ERROR_INVALID_VALUE), MContext(SharedPtrCtx), MErrC(EC) { + detail::GlobalHandler::instance().TraceEventXPTI(MMsg->c_str()); +} +#else exception::exception(std::error_code EC, std::shared_ptr SharedPtrCtx, const std::string &WhatArg) : MMsg(std::make_shared(WhatArg)), MPIErr(PI_ERROR_INVALID_VALUE), MContext(SharedPtrCtx), MErrC(EC) { detail::GlobalHandler::instance().TraceEventXPTI(MMsg->c_str()); } +#endif exception::~exception() {} diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 94baf075b8c23..5b40f1b3b07c3 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -173,7 +173,12 @@ event handler::finalize() { !MImpl->isStateExplicitKernelBundle()) { auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device(); kernel_id KernelID = +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + detail::ProgramManager::getInstance().getSYCLKernelID( + MKernelName.c_str()); +#else detail::ProgramManager::getInstance().getSYCLKernelID(MKernelName); +#endif bool KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev); // If kernel was not inserted and the bundle is in input mode we try // building it and trying to find the kernel in executable mode @@ -232,7 +237,11 @@ event handler::finalize() { // uint32_t StreamID, uint64_t InstanceID, xpti_td* TraceEvent, int32_t StreamID = xptiRegisterStream(detail::SYCL_STREAM_NAME); auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData( +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + StreamID, MKernel, MCodeLoc, MKernelName.c_str(), MQueue, MNDRDesc, +#else StreamID, MKernel, MCodeLoc, MKernelName, MQueue, MNDRDesc, +#endif KernelBundleImpPtr, MArgs); auto EnqueueKernel = [&, CmdTraceEvent = CmdTraceEvent, InstanceID = InstanceID]() { @@ -282,7 +291,11 @@ event handler::finalize() { } else { Result = enqueueImpKernel( MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, MKernel, +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + MKernelName.c_str(), RawEvents, NewEvent, nullptr, +#else MKernelName, RawEvents, NewEvent, nullptr, +#endif MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative); } } @@ -304,7 +317,12 @@ event handler::finalize() { // Kernel only uses assert if it's non interop one bool KernelUsesAssert = !(MKernel && MKernel->isInterop()) && +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + detail::ProgramManager::getInstance().kernelUsesAssert( + MKernelName.c_str()); +#else detail::ProgramManager::getInstance().kernelUsesAssert(MKernelName); +#endif DiscardEvent = !KernelUsesAssert; } @@ -340,7 +358,11 @@ event handler::finalize() { CommandGroup.reset(new detail::CGExecKernel( std::move(MNDRDesc), std::move(MHostKernel), std::move(MKernel), std::move(MImpl->MKernelBundle), std::move(CGData), std::move(MArgs), +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + MKernelName.c_str(), std::move(MStreamStorage), +#else MKernelName, std::move(MStreamStorage), +#endif std::move(MImpl->MAuxiliaryResources), MCGType, MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative, MCodeLoc)); break; @@ -866,11 +888,15 @@ void handler::extractArgsAndReqsFromLambda( // Calling methods of kernel_impl requires knowledge of class layout. // As this is impossible in header, there's a function that calls necessary // method inside the library and returns the result. -std::string handler::getKernelName() { - return MKernel->get_info(); +detail::string handler::getKernelName() { + return detail::string{MKernel->get_info()}; } +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +void handler::verifyUsedKernelBundleInternal(detail::string_view KernelName) { +#else void handler::verifyUsedKernelBundle(const std::string &KernelName) { +#endif auto UsedKernelBundleImplPtr = getOrInsertHandlerKernelBundle(/*Insert=*/false); if (!UsedKernelBundleImplPtr) @@ -1385,9 +1411,15 @@ id<2> handler::computeFallbackKernelBounds(size_t Width, size_t Height) { return id<2>{std::min(ItemLimit[0], Height), std::min(ItemLimit[1], Width)}; } +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +void handler::ext_intel_read_host_pipe(detail::string_view Name, void *Ptr, + size_t Size, bool Block) { + MImpl->HostPipeName = Name.data(); +#else void handler::ext_intel_read_host_pipe(const std::string &Name, void *Ptr, size_t Size, bool Block) { MImpl->HostPipeName = Name; +#endif MImpl->HostPipePtr = Ptr; MImpl->HostPipeTypeSize = Size; MImpl->HostPipeBlocking = Block; @@ -1395,9 +1427,15 @@ void handler::ext_intel_read_host_pipe(const std::string &Name, void *Ptr, setType(detail::CG::ReadWriteHostPipe); } +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +void handler::ext_intel_write_host_pipe(detail::string_view Name, void *Ptr, + size_t Size, bool Block) { + MImpl->HostPipeName = Name.data(); +#else void handler::ext_intel_write_host_pipe(const std::string &Name, void *Ptr, size_t Size, bool Block) { MImpl->HostPipeName = Name; +#endif MImpl->HostPipePtr = Ptr; MImpl->HostPipeTypeSize = Size; MImpl->HostPipeBlocking = Block; diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index e7a2fd0902d17..5fcad0af99ca3 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -139,8 +139,13 @@ removeDuplicateDevices(const std::vector &Devs) { return UniqueDevices; } -kernel_id get_kernel_id_impl(std::string KernelName) { +kernel_id get_kernel_id_impl(string_view KernelName) { +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + return detail::ProgramManager::getInstance().getSYCLKernelID( + KernelName.data()); +#else return detail::ProgramManager::getInstance().getSYCLKernelID(KernelName); +#endif } detail::KernelBundleImplPtr diff --git a/sycl/source/platform.cpp b/sycl/source/platform.cpp index 5cc2a49801902..c991f27eda51b 100644 --- a/sycl/source/platform.cpp +++ b/sycl/source/platform.cpp @@ -57,18 +57,29 @@ std::vector platform::get_platforms() { backend platform::get_backend() const noexcept { return impl->getBackend(); } template -typename detail::is_platform_info_desc::return_type +detail::ABINeutralT_t< + typename detail::is_platform_info_desc::return_type> +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +platform::get_info_impl() const { +#else platform::get_info() const { - return impl->get_info(); +#endif + return detail::convert_to_abi_neutral(impl->template get_info()); } pi_native_handle platform::getNative() const { return impl->getNative(); } bool platform::has(aspect Aspect) const { return impl->has(Aspect); } +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ - template __SYCL_EXPORT ReturnT platform::get_info() \ - const; + template __SYCL_EXPORT detail::ABINeutralT_t \ + platform::get_info_impl() const; +#else +#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ + template __SYCL_EXPORT detail::ABINeutralT_t \ + platform::get_info() const; +#endif #include #undef __SYCL_PARAM_TRAITS_SPEC diff --git a/sycl/test-e2e/Basic/built-ins/helpers.hpp b/sycl/test-e2e/Basic/built-ins/helpers.hpp index 788055369650e..2cc4fac936bc8 100644 --- a/sycl/test-e2e/Basic/built-ins/helpers.hpp +++ b/sycl/test-e2e/Basic/built-ins/helpers.hpp @@ -1,12 +1,12 @@ #include -template bool equal(T x, T y, double delta) { +template bool equal(T x, T y) { // Maybe should be C++20's std::equality_comparable. if constexpr (std::is_scalar_v) { - return std::abs(x - y) <= delta; + return x == y; } else { for (size_t i = 0; i < x.size(); ++i) - if (std::abs(x[i] - y[i]) > delta) + if (x[i] != y[i]) return false; return true; @@ -15,10 +15,10 @@ template bool equal(T x, T y, double delta) { template -void test(bool CheckDevice, double delta, FuncTy F, ExpectedTy Expected, ArgTys... Args) { +void test(bool CheckDevice, FuncTy F, ExpectedTy Expected, ArgTys... Args) { auto R = F(Args...); static_assert(std::is_same_v); - assert(equal(R, Expected, delta)); + assert(equal(R, Expected)); if (!CheckDevice) return; @@ -29,7 +29,7 @@ void test(bool CheckDevice, double delta, FuncTy F, ExpectedTy Expected, ArgTys. cgh.single_task([=]() { auto R = F(Args...); static_assert(std::is_same_v); - Success[0] = equal(R, Expected, delta); + Success[0] = equal(R, Expected); }); }); assert(sycl::host_accessor{SuccessBuf}[0]); @@ -37,16 +37,7 @@ void test(bool CheckDevice, double delta, FuncTy F, ExpectedTy Expected, ArgTys. template void test(FuncTy F, ExpectedTy Expected, ArgTys... Args) { - test(true /*CheckDevice*/, 0.0 /*delta*/, F, Expected, Args...); -} -template -void test(bool CheckDevice, FuncTy F, ExpectedTy Expected, ArgTys... Args) { - test(CheckDevice, 0.0 /*delta*/, F, Expected, Args...); -} -template -void test(double delta, FuncTy F, ExpectedTy Expected, ArgTys... Args) { - test(true /*CheckDevice*/, delta, F, Expected, Args...); + test(true /*CheckDevice*/, F, Expected, Args...); } // MSVC's STL spoils global namespace with math functions, so use explicit diff --git a/sycl/test-e2e/Basic/built-ins/marray_common.cpp b/sycl/test-e2e/Basic/built-ins/marray_common.cpp index f9415802c6ecf..2c63690f39f1c 100644 --- a/sycl/test-e2e/Basic/built-ins/marray_common.cpp +++ b/sycl/test-e2e/Basic/built-ins/marray_common.cpp @@ -8,76 +8,115 @@ #endif #include -#include "helpers.hpp" +#include -int main() { - using namespace sycl; +#define TEST(FUNC, MARRAY_ELEM_TYPE, DIM, EXPECTED, DELTA, ...) \ + { \ + { \ + MARRAY_ELEM_TYPE result[DIM]; \ + { \ + sycl::buffer b(result, sycl::range{DIM}); \ + deviceQueue.submit([&](sycl::handler &cgh) { \ + sycl::accessor res_access{b, cgh}; \ + cgh.single_task([=]() { \ + sycl::marray res = FUNC(__VA_ARGS__); \ + for (int i = 0; i < DIM; i++) \ + res_access[i] = res[i]; \ + }); \ + }); \ + } \ + for (int i = 0; i < DIM; i++) \ + assert(abs(result[i] - EXPECTED[i]) <= DELTA); \ + } \ + } - queue deviceQueue; - device dev = deviceQueue.get_device(); +#define EXPECTED(TYPE, ...) ((TYPE[]){__VA_ARGS__}) - marray ma1{1.0f, 2.0f}; - marray ma2{1.0f, 2.0f}; - marray ma3{3.0f, 2.0f}; - marray ma4{1.0, 2.0}; - marray ma5{M_PI, M_PI, M_PI}; - marray ma6{M_PI, M_PI, M_PI}; - marray ma7{M_PI, M_PI, M_PI}; - marray ma8{0.3f, 0.6f}; - marray ma9{5.0, 8.0}; - marray ma10{180, 180, 180}; - marray ma11{180, 180, 180}; - marray ma12{180, 180, 180}; - marray ma13{181, 179, 181}; - marray ma14{+0.0f, -0.6f}; - marray ma15{-0.0, 0.6f}; +int main() { + sycl::queue deviceQueue; + sycl::device dev = deviceQueue.get_device(); - bool has_fp16 = queue{}.get_device().has(sycl::aspect::fp16); - bool has_fp64 = queue{}.get_device().has(sycl::aspect::fp64); + sycl::marray ma1{1.0f, 2.0f}; + sycl::marray ma2{1.0f, 2.0f}; + sycl::marray ma3{3.0f, 2.0f}; + sycl::marray ma4{1.0, 2.0}; + sycl::marray ma5{M_PI, M_PI, M_PI}; + sycl::marray ma6{M_PI, M_PI, M_PI}; + sycl::marray ma7{M_PI, M_PI, M_PI}; + sycl::marray ma8{0.3f, 0.6f}; + sycl::marray ma9{5.0, 8.0}; + sycl::marray ma10{180, 180, 180}; + sycl::marray ma11{180, 180, 180}; + sycl::marray ma12{180, 180, 180}; + sycl::marray ma13{181, 179, 181}; + sycl::marray ma14{+0.0f, -0.6f}; + sycl::marray ma15{-0.0, 0.6f}; - // clamp - test(F(clamp), marray{1.0f, 2.0f}, ma1, ma2, ma3); - test(F(clamp), marray{1.0f, 2.0f}, ma1, 1.0f, 3.0f); - test(has_fp64, F(clamp), marray{1.0, 2.0}, ma4, 1.0, 3.0); - // degrees - test(F(degrees), marray{180, 180, 180}, ma5); - test(has_fp64, F(degrees), marray{180, 180, 180}, ma6); - test(has_fp16, 0.2, F(degrees), marray{180, 180, 180}, ma7); - // max - test(F(max), marray{3.0f, 2.0f}, ma1, ma3); - test(F(max), marray{1.5f, 2.0f}, ma1, 1.5f); - test(has_fp64, F(max), marray{1.5, 2.0}, ma4, 1.5); - // min - test(F(min), marray{1.0f, 2.0f}, ma1, ma3); - test(F(min), marray{1.0f, 1.5f}, ma1, 1.5f); - test(has_fp64, F(min), marray{1.0, 1.5}, ma4, 1.5); - // mix - test(F(mix), marray{1.6f, 2.0f}, ma1, ma3, ma8); - test(F(mix), marray{1.4f, 2.0f}, ma1, ma3, 0.2); - test(has_fp64, F(mix), marray{3.0, 5.0}, ma4, ma9, 0.5); - // radians - test(F(radians), marray{M_PI, M_PI, M_PI}, ma10); - test(has_fp64, F(radians), marray{M_PI, M_PI, M_PI}, ma11); - test(has_fp16, 0.002, F(radians), marray{M_PI, M_PI, M_PI}, ma12); - // step - test(F(step), marray{1.0f, 1.0f}, ma1, ma3); - test(has_fp64, F(step), marray{1.0, 1.0}, ma4, ma9); - test(has_fp16, F(step), marray{1.0, 0.0, 1.0}, ma12, ma13); - test(F(step), marray{1.0f, 0.0f}, 2.5f, ma3); - test(has_fp64, F(step), marray{0.0f, 1.0f}, 6.0f, ma9); - // smoothstep - test(F(smoothstep), marray{1.0f, 1.0f}, ma8, ma1, ma2); - test(has_fp64, 0.00000001, F(smoothstep), marray{1.0, 1.0f}, ma4, - ma9, ma9); - test(has_fp16, F(smoothstep), marray{1.0, 1.0, 1.0}, ma7, ma12, - ma13); - test(0.0000001, F(smoothstep), marray{0.0553936f, 0.0f}, 2.5f, 6.0f, - ma3); - test(has_fp64, F(smoothstep), marray{0.0f, 1.0f}, 6.0f, 8.0f, ma9); + // sycl::clamp + TEST(sycl::clamp, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, ma1, ma2, ma3); + TEST(sycl::clamp, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, ma1, 1.0f, 3.0f); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::clamp, double, 2, EXPECTED(double, 1.0, 2.0), 0, ma4, 1.0, 3.0); + // sycl::degrees + TEST(sycl::degrees, float, 3, EXPECTED(float, 180, 180, 180), 0, ma5); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::degrees, double, 3, EXPECTED(double, 180, 180, 180), 0, ma6); + if (dev.has(sycl::aspect::fp16)) + TEST(sycl::degrees, sycl::half, 3, EXPECTED(sycl::half, 180, 180, 180), 0.2, + ma7); + // sycl::max + TEST(sycl::max, float, 2, EXPECTED(float, 3.0f, 2.0f), 0, ma1, ma3); + TEST(sycl::max, float, 2, EXPECTED(float, 1.5f, 2.0f), 0, ma1, 1.5f); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::max, double, 2, EXPECTED(double, 1.5, 2.0), 0, ma4, 1.5); + // sycl::min + TEST(sycl::min, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, ma1, ma3); + TEST(sycl::min, float, 2, EXPECTED(float, 1.0f, 1.5f), 0, ma1, 1.5f); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::min, double, 2, EXPECTED(double, 1.0, 1.5), 0, ma4, 1.5); + // sycl::mix + TEST(sycl::mix, float, 2, EXPECTED(float, 1.6f, 2.0f), 0, ma1, ma3, ma8); + TEST(sycl::mix, float, 2, EXPECTED(float, 1.4f, 2.0f), 0, ma1, ma3, 0.2); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::mix, double, 2, EXPECTED(double, 3.0, 5.0), 0, ma4, ma9, 0.5); + // sycl::radians + TEST(sycl::radians, float, 3, EXPECTED(float, M_PI, M_PI, M_PI), 0, ma10); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::radians, double, 3, EXPECTED(double, M_PI, M_PI, M_PI), 0, ma11); + if (dev.has(sycl::aspect::fp16)) + TEST(sycl::radians, sycl::half, 3, EXPECTED(sycl::half, M_PI, M_PI, M_PI), + 0.002, ma12); + // sycl::step + TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, ma1, ma3); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::step, double, 2, EXPECTED(double, 1.0, 1.0), 0, ma4, ma9); + if (dev.has(sycl::aspect::fp16)) + TEST(sycl::step, sycl::half, 3, EXPECTED(sycl::half, 1.0, 0.0, 1.0), 0, + ma12, ma13); + TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 0.0f), 0, 2.5f, ma3); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::step, double, 2, EXPECTED(double, 0.0f, 1.0f), 0, 6.0f, ma9); + // sycl::smoothstep + TEST(sycl::smoothstep, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, ma8, ma1, + ma2); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::smoothstep, double, 2, EXPECTED(double, 1.0, 1.0f), 0.00000001, + ma4, ma9, ma9); + if (dev.has(sycl::aspect::fp16)) + TEST(sycl::smoothstep, sycl::half, 3, EXPECTED(sycl::half, 1.0, 1.0, 1.0), + 0, ma7, ma12, ma13); + TEST(sycl::smoothstep, float, 2, EXPECTED(float, 0.0553936f, 0.0f), 0.0000001, + 2.5f, 6.0f, ma3); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::smoothstep, double, 2, EXPECTED(double, 0.0f, 1.0f), 0, 6.0f, + 8.0f, ma9); // sign - test(F(sign), marray{+0.0f, -1.0f}, ma14); - test(has_fp64, F(sign), marray{-0.0, 1.0}, ma15); - test(has_fp16, F(sign), marray{1.0, 1.0, 1.0}, ma12); + TEST(sycl::sign, float, 2, EXPECTED(float, +0.0f, -1.0f), 0, ma14); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::sign, double, 2, EXPECTED(double, -0.0, 1.0), 0, ma15); + if (dev.has(sycl::aspect::fp16)) + TEST(sycl::sign, sycl::half, 3, EXPECTED(sycl::half, 1.0, 1.0, 1.0), 0, + ma12); return 0; } diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_surf_store_u64.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_surf_store_u64.cpp index ea5d6cf804ebf..a340edab0321c 100644 --- a/sycl/test-e2e/ESIMD/lsc/lsc_surf_store_u64.cpp +++ b/sycl/test-e2e/ESIMD/lsc/lsc_surf_store_u64.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: gpu-intel-pvc +// REQUIRES: gpu-intel-pvc || gpu-intel-dg2 // RUN: %{build} -o %t.out // RUN: %{run} %t.out @@ -36,9 +36,12 @@ template bool tests() { int main(void) { srand(seed); bool passed = true; + auto Q = queue{gpu_selector_v}; passed &= tests<0, uint64_t>(); - passed &= tests<11, double>(); + if (Q.get_device().has(sycl::aspect::fp64)) { + passed &= tests<11, double>(); + } std::cout << (passed ? "Passed\n" : "FAILED\n"); return passed ? 0 : 1; diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_surf_store_u64_stateless.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_surf_store_u64_stateless.cpp index dba135e798c3e..a39ea4d865bbb 100644 --- a/sycl/test-e2e/ESIMD/lsc/lsc_surf_store_u64_stateless.cpp +++ b/sycl/test-e2e/ESIMD/lsc/lsc_surf_store_u64_stateless.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: gpu-intel-pvc +// REQUIRES: gpu-intel-pvc || gpu-intel-dg2 // RUN: %{build} -fsycl-esimd-force-stateless-mem -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_usm_atomic_cachehint.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_usm_atomic_cachehint.cpp index d797149205983..a6547ef539d24 100644 --- a/sycl/test-e2e/ESIMD/lsc/lsc_usm_atomic_cachehint.cpp +++ b/sycl/test-e2e/ESIMD/lsc/lsc_usm_atomic_cachehint.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: gpu-intel-pvc +// REQUIRES: gpu-intel-pvc || gpu-intel-dg2 // TODO : Test uses 'kernel_bundle' that is not supported in ESIMD_EMULATOR // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_usm_atomic_cachehint_64.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_usm_atomic_cachehint_64.cpp index d9b07693ef028..06849c1e08876 100644 --- a/sycl/test-e2e/ESIMD/lsc/lsc_usm_atomic_cachehint_64.cpp +++ b/sycl/test-e2e/ESIMD/lsc/lsc_usm_atomic_cachehint_64.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: gpu-intel-pvc +// REQUIRES: gpu-intel-pvc || gpu-intel-dg2 // TODO : Test uses 'kernel_bundle' that is not supported in ESIMD_EMULATOR // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_usm_gather_u64.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_usm_gather_u64.cpp index c777b897df925..b3e8516670d28 100644 --- a/sycl/test-e2e/ESIMD/lsc/lsc_usm_gather_u64.cpp +++ b/sycl/test-e2e/ESIMD/lsc/lsc_usm_gather_u64.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: gpu-intel-pvc +// REQUIRES: gpu-intel-pvc || gpu-intel-dg2 // RUN: %{build} -o %t.out // RUN: %{run} %t.out @@ -13,12 +13,14 @@ int main(void) { constexpr uint32_t Seed = 185; + auto Q = queue{gpu_selector_v}; srand(Seed); bool Passed = true; Passed &= test_lsc_gather(); - Passed &= test_lsc_gather(); - + if (Q.get_device().has(sycl::aspect::fp64)) { + Passed &= test_lsc_gather(); + } std::cout << (Passed ? "Passed\n" : "FAILED\n"); return Passed ? 0 : 1; } diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_usm_gather_u64_64.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_usm_gather_u64_64.cpp index cb3c1518bcc85..2ee1e701de138 100644 --- a/sycl/test-e2e/ESIMD/lsc/lsc_usm_gather_u64_64.cpp +++ b/sycl/test-e2e/ESIMD/lsc/lsc_usm_gather_u64_64.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: gpu-intel-pvc +// REQUIRES: gpu-intel-pvc || gpu-intel-dg2 // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Graph/Explicit/enqueue_ordering.cpp b/sycl/test-e2e/Graph/Explicit/enqueue_ordering.cpp index 04da3eaa5414b..da3ccd2329ec9 100644 --- a/sycl/test-e2e/Graph/Explicit/enqueue_ordering.cpp +++ b/sycl/test-e2e/Graph/Explicit/enqueue_ordering.cpp @@ -1,3 +1,4 @@ +// REQUIRES: aspect-usm_shared_allocations // RUN: %{build} -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG diff --git a/sycl/test-e2e/Graph/Explicit/work_group_size_prop.cpp b/sycl/test-e2e/Graph/Explicit/work_group_size_prop.cpp index 9a4366d1f1d3d..d1eb9ef35c355 100644 --- a/sycl/test-e2e/Graph/Explicit/work_group_size_prop.cpp +++ b/sycl/test-e2e/Graph/Explicit/work_group_size_prop.cpp @@ -11,10 +11,9 @@ // should. So the Sycl graph support cannot correctly catch the error and throw // the approriate exception for negative test. An issue has been reported // https://github.com/bashbaug/SimpleOpenCLSamples/issues/95 -// XFAIL: cuda +// XFAIL: cuda, hip // UNSUPPORTED: opencl // Note: failing negative test with HIP in the original test -// TODO: disable hip when HIP backend will be supported by Graph #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/RecordReplay/work_group_size_prop.cpp b/sycl/test-e2e/Graph/RecordReplay/work_group_size_prop.cpp index d9007264aa3e8..d4bd94be66ce0 100644 --- a/sycl/test-e2e/Graph/RecordReplay/work_group_size_prop.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/work_group_size_prop.cpp @@ -11,10 +11,9 @@ // should. So the Sycl graph support cannot correctly catch the error and throw // the approriate exception for negative test. An issue has been reported // https://github.com/bashbaug/SimpleOpenCLSamples/issues/95 -// XFAIL: cuda +// XFAIL: cuda, hip // UNSUPPORTED: opencl // Note: failing negative test with HIP in the original test -// TODO: disable hip when HIP backend will be supported by Graph #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/test-e2e/Graph/device_query.cpp b/sycl/test-e2e/Graph/device_query.cpp index f1f2e680c323a..3513aa74fa5b7 100644 --- a/sycl/test-e2e/Graph/device_query.cpp +++ b/sycl/test-e2e/Graph/device_query.cpp @@ -20,7 +20,8 @@ int main() { auto Backend = Device.get_backend(); if ((Backend == backend::ext_oneapi_level_zero) || - (Backend == backend::ext_oneapi_cuda)) { + (Backend == backend::ext_oneapi_cuda) || + (Backend == backend::ext_oneapi_hip)) { assert(SupportsGraphs == exp_ext::graph_support_level::native); } else if (Backend == backend::opencl) { // OpenCL backend support is conditional on the cl_khr_command_buffer diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp index f672b75c5a57a..7040058dc8554 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp @@ -8,8 +8,11 @@ // REQUIRES: matrix // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943 -// RUN: %{build} -o %t.out -ffp-model=precise -// RUN: %{run} %t.out +// RUN: %{build} -o %t_gpu.out -ffp-model=precise +// RUN: %if gpu %{ %{run} %t_gpu.out %} + +// RUN: %{build} -ffp-model=precise -o %t_cpu.out -DtM=16 -DtK=32 -DNCACHE1=32 -DKCACHE1=32 +// RUN: %if cpu %{ %{run} %t_cpu.out %} // -ffp-model=precise is added to not depend on compiler defaults. diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp index 27ca26b32d396..0c93876db2a15 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp @@ -7,8 +7,11 @@ //===----------------------------------------------------------------------===// // REQUIRES: matrix -// RUN: %{build} -o %t.out -ffp-model=precise -// RUN: %{run} %t.out +// RUN: %{build} -o %t_gpu.out -ffp-model=precise +// RUN: %if gpu %{ %{run} %t_gpu.out %} + +// RUN: %{build} -ffp-model=precise -o %t_cpu.out -DtM=16 -DtK=32 -DNCACHE1=32 -DKCACHE1=32 +// RUN: %if cpu %{ %{run} %t_cpu.out %} // -ffp-model=precise is added to not depend on compiler defaults. diff --git a/sycl/test-e2e/OneapiDeviceSelector/illegal_input.cpp b/sycl/test-e2e/OneapiDeviceSelector/illegal_input.cpp index 59129cf820abe..e21915ad9b23f 100644 --- a/sycl/test-e2e/OneapiDeviceSelector/illegal_input.cpp +++ b/sycl/test-e2e/OneapiDeviceSelector/illegal_input.cpp @@ -9,6 +9,7 @@ // RUN: env ONEAPI_DEVICE_SELECTOR="level_zero:" %{run-unfiltered-devices} %t.out // RUN: env ONEAPI_DEVICE_SELECTOR="level_zero:::gpu" %{run-unfiltered-devices} %t.out // RUN: env ONEAPI_DEVICE_SELECTOR="level_zero:.1" %{run-unfiltered-devices} %t.out +// RUN: env ONEAPI_DEVICE_SELECTOR="" %{run-unfiltered-devices} %t.out // XFAIL: * // Calling ONEAPI_DEVICE_SELECTOR with an illegal input should result in an diff --git a/sycl/test-e2e/Plugin/level_zero_usm_residency.cpp b/sycl/test-e2e/Plugin/level_zero_usm_residency.cpp index 00091b4e257c4..97522316cce28 100644 --- a/sycl/test-e2e/Plugin/level_zero_usm_residency.cpp +++ b/sycl/test-e2e/Plugin/level_zero_usm_residency.cpp @@ -23,7 +23,6 @@ int main(int argc, char *argv[]) { // DEVICE: ---> piextUSMDeviceAlloc // DEVICE: ZE ---> zeMemAllocDevice // DEVICE: ZE ---> zeContextMakeMemoryResident - // DEVICE-NOT: ZE ---> zeContextMakeMemoryResident auto ptr2 = malloc_shared(1, Q); // SHARED: ---> piextUSMSharedAlloc diff --git a/sycl/test/basic_tests/device-selectors-exception.cpp b/sycl/test/basic_tests/device-selectors-exception.cpp index 5bddf0ed43454..2256b7f4745b8 100644 --- a/sycl/test/basic_tests/device-selectors-exception.cpp +++ b/sycl/test/basic_tests/device-selectors-exception.cpp @@ -1,7 +1,9 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_FILTER="" %t.out // RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fsycl-targets=%sycl_triple -fpreview-breaking-changes %s -o %t.out %} -// RUN: %if preview-breaking-changes-supported %{ env ONEAPI_DEVICE_SELECTOR="" %t.out %} +// ONEAPI_DEVICE_SELECTOR="*:-1" causes this test to not select any device at +// all. +// RUN: %if preview-breaking-changes-supported %{ env ONEAPI_DEVICE_SELECTOR="*:-1" %t.out %} #include using namespace sycl; diff --git a/sycl/test/check_device_code/native_cpu/vectorization.cpp b/sycl/test/check_device_code/native_cpu/vectorization.cpp index a6b5211bff4f7..07e7a92805935 100644 --- a/sycl/test/check_device_code/native_cpu/vectorization.cpp +++ b/sycl/test/check_device_code/native_cpu/vectorization.cpp @@ -1,4 +1,4 @@ -// REQUIRES: native_cpu_be +// REQUIRES: native_cpu_ock // RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -mllvm -inline-threshold=500 -S -emit-llvm -o %t_temp.ll %s // RUN: %clangxx -O2 -mllvm -sycl-native-cpu-backend -S -emit-llvm -o - %t_temp.ll | FileCheck %s --check-prefix=CHECK-DEFAULT // RUN: %clangxx -O2 -mllvm -sycl-native-cpu-backend -mllvm -sycl-native-cpu-vecz-width=16 -S -emit-llvm -o - %t_temp.ll | FileCheck %s --check-prefix=CHECK-16 diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index 3b8ea40363d6b..104a4bd6c9d22 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -159,6 +159,9 @@ if config.native_cpu_be == "ON": config.available_features.add("native_cpu_be") +if config.native_cpu_ock == "ON": + config.available_features.add("native_cpu_ock") + if "nvptx64-nvidia-cuda" in triple: llvm_config.with_system_environment("CUDA_PATH") config.available_features.add("cuda") diff --git a/sycl/test/lit.site.cfg.py.in b/sycl/test/lit.site.cfg.py.in index 884aa2f0a4204..98e6fadfea77b 100644 --- a/sycl/test/lit.site.cfg.py.in +++ b/sycl/test/lit.site.cfg.py.in @@ -31,6 +31,7 @@ config.hip_be = '@SYCL_BUILD_PI_HIP@' config.opencl_be = '@SYCL_BUILD_PI_OPENCL@' config.level_zero_be = '@SYCL_BUILD_PI_LEVEL_ZERO@' config.native_cpu_be = '@SYCL_BUILD_NATIVE_CPU@' +config.native_cpu_ock = '@NATIVECPU_USE_OCK@' config.sycl_preview_lib_enabled = '@SYCL_ENABLE_MAJOR_RELEASE_PREVIEW_LIB@' import lit.llvm diff --git a/sycl/test/native_cpu/barrier-external.cpp b/sycl/test/native_cpu/barrier-external.cpp index 224e1babf43f4..5f3e9358e9309 100644 --- a/sycl/test/native_cpu/barrier-external.cpp +++ b/sycl/test/native_cpu/barrier-external.cpp @@ -1,4 +1,4 @@ -// REQUIRES: native_cpu_be +// REQUIRES: native_cpu_ock // RUN: %clangxx -DFILE1 -fsycl -fsycl-targets=native_cpu %s -g -c -o %t1.o // RUN: %clangxx -DFILE2 -fsycl -fsycl-targets=native_cpu %s -g -c -o %t2.o // RUN: %clangxx -fsycl -fsycl-targets=native_cpu %t1.o %t2.o -g -o %t diff --git a/sycl/test/native_cpu/barrier-simple.cpp b/sycl/test/native_cpu/barrier-simple.cpp index c2ba2fc044724..6b3f6e5e05f48 100644 --- a/sycl/test/native_cpu/barrier-simple.cpp +++ b/sycl/test/native_cpu/barrier-simple.cpp @@ -1,4 +1,4 @@ -// REQUIRES: native_cpu_be +// REQUIRES: native_cpu_ock // RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t // RUN: env ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index b3b27a502bcee..f9f580022ccad 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -122,7 +122,11 @@ class MockHandler : public sycl::handler { std::move(CGH->MNDRDesc), std::move(CGH->MHostKernel), std::move(CGH->MKernel), std::move(MImpl->MKernelBundle), std::move(CGH->CGData), std::move(CGH->MArgs), +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + CGH->MKernelName.c_str(), std::move(CGH->MStreamStorage), +#else std::move(CGH->MKernelName), std::move(CGH->MStreamStorage), +#endif std::move(MImpl->MAuxiliaryResources), CGH->MCGType, {}, MImpl->MKernelIsCooperative, CGH->MCodeLoc)); break; diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index a1d1eb8058826..2a2f20c297a25 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -258,7 +258,11 @@ class MockHandler : public sycl::handler { return CGData.MEvents; } std::vector &getArgs() { return MArgs; } +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + std::string getKernelName() { return MKernelName.c_str(); } +#else std::string &getKernelName() { return MKernelName; } +#endif std::shared_ptr &getKernel() { return MKernel; } std::unique_ptr &getHostTask() { return MHostTask; } std::shared_ptr &getQueue() { return MQueue; }