From 68445467cef311e41e3b4b1398b50a2943db9221 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Wed, 6 Mar 2024 13:31:43 +0100 Subject: [PATCH] [SYCL] Implement device arch comparison according to sycl_ext_oneapi_device_architecture spec update (#12752) This patch implements device AOT and host APIs for device architecture comparison in accordance to [spec update](https://github.com/intel/llvm/pull/12259) --- sycl/include/sycl/device.hpp | 11 + .../experimental/device_architecture.hpp | 969 +++++++++++++----- sycl/source/detail/device_impl.hpp | 13 + sycl/source/device.cpp | 6 + ..._architecture_comparison_on_device_aot.cpp | 199 ++++ ...device_architecture_comparison_on_host.cpp | 42 + sycl/test/abi/sycl_symbols_linux.dump | 1 + sycl/test/abi/sycl_symbols_windows.dump | 1 + 8 files changed, 988 insertions(+), 254 deletions(-) create mode 100644 sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp create mode 100644 sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index df265e1585e83..f3e1937040dba 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -282,6 +282,17 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase { /// the function. bool ext_oneapi_architecture_is(ext::oneapi::experimental::architecture arch); + /// Indicates if the SYCL device architecture is in the category passed + /// to the function. + /// + /// \param category is one of the architecture categories from arch_category + /// enum described in sycl_ext_oneapi_device_architecture specification. + /// + /// \return true if the SYCL device architecture is in the category passed to + /// the function. + bool + ext_oneapi_architecture_is(ext::oneapi::experimental::arch_category category); + // TODO: Remove this diagnostics when __SYCL_WARN_IMAGE_ASPECT is removed. #if defined(__clang__) #pragma clang diagnostic pop diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp index cbd76058825d9..fd38003944418 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp @@ -8,14 +8,19 @@ #pragma once +#include // for uint64_t +#include + namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { -enum class architecture { +enum class architecture : uint64_t { // If new element is added to this enum: // // Update + // - "detail::min__architecture" below if needed + // - "detail::max__architecture" below if needed // - sycl_ext_oneapi_device_architecture specification doc // - "-fsycl-targets" description in sycl/doc/UsersManual.md // @@ -24,92 +29,122 @@ enum class architecture { // - the unique ID of the new architecture in SYCL RT source code to support // querying the device architecture // - x86_64, - intel_cpu_spr, - intel_cpu_gnr, - intel_gpu_bdw, - intel_gpu_skl, - intel_gpu_kbl, - intel_gpu_cfl, - intel_gpu_apl, + // Important note about keeping architecture IDs below unique: + // - the architecture ID must be a hex number with 16 digits + // - the architecture ID must suit the following template: + // 0x AA BBBB CCCCCCCC DD (without spaces), where + // - AA is 2-digit ID of the architecture family which must be unique + // - BBBB is 4-digit number reserved for future modifications + // to keep uniqueness. It should be always 0000 for now + // - CCCCCCCC is 8-digit number of architecture itself. It must be + // unique for all architectures inside the family + // - DD is 2-digit number reserved for future unexpected modifications + // to keep uniqueness. It should be always 00 for now + // + x86_64 = 0x9900000000000000, + // + // Intel CPU architectures + // + // AA is 03, + // CCCCCCCC is the architecture ID from the DEVICE_IP_VERSION extension of + // underlied backend + intel_cpu_spr = 0x0300000000000800, + intel_cpu_gnr = 0x0300000000000900, + // + // Intel GPU architectures + // + // AA is 00, + // CCCCCCCC is GMDID of that architecture + intel_gpu_bdw = 0x0000000200000000, + intel_gpu_skl = 0x0000000240000900, + intel_gpu_kbl = 0x0000000240400900, + intel_gpu_cfl = 0x0000000240800900, + intel_gpu_apl = 0x0000000240c00000, intel_gpu_bxt = intel_gpu_apl, - intel_gpu_glk, - intel_gpu_whl, - intel_gpu_aml, - intel_gpu_cml, - intel_gpu_icllp, - intel_gpu_ehl, + intel_gpu_glk = 0x0000000241000000, + intel_gpu_whl = 0x0000000241400000, + intel_gpu_aml = 0x0000000241800000, + intel_gpu_cml = 0x0000000241c00000, + intel_gpu_icllp = 0x00000002c0000000, + intel_gpu_ehl = 0x00000002c0800000, intel_gpu_jsl = intel_gpu_ehl, - intel_gpu_tgllp, - intel_gpu_rkl, - intel_gpu_adl_s, + intel_gpu_tgllp = 0x0000000300000000, + intel_gpu_rkl = 0x0000000300400000, + intel_gpu_adl_s = 0x0000000300800000, intel_gpu_rpl_s = intel_gpu_adl_s, - intel_gpu_adl_p, - intel_gpu_adl_n, - intel_gpu_dg1, - intel_gpu_acm_g10, + intel_gpu_adl_p = 0x0000000300c00000, + intel_gpu_adl_n = 0x0000000301000000, + intel_gpu_dg1 = 0x0000000302800000, + intel_gpu_acm_g10 = 0x000000030dc00800, intel_gpu_dg2_g10 = intel_gpu_acm_g10, - intel_gpu_acm_g11, + intel_gpu_acm_g11 = 0x000000030e000500, intel_gpu_dg2_g11 = intel_gpu_acm_g11, - intel_gpu_acm_g12, + intel_gpu_acm_g12 = 0x000000030e400000, intel_gpu_dg2_g12 = intel_gpu_acm_g12, - intel_gpu_pvc, - intel_gpu_pvc_vg, + intel_gpu_pvc = 0x000000030f000700, + intel_gpu_pvc_vg = 0x000000030f400700, + // // NVIDIA architectures - nvidia_gpu_sm_50, - nvidia_gpu_sm_52, - nvidia_gpu_sm_53, - nvidia_gpu_sm_60, - nvidia_gpu_sm_61, - nvidia_gpu_sm_62, - nvidia_gpu_sm_70, - nvidia_gpu_sm_72, - nvidia_gpu_sm_75, - nvidia_gpu_sm_80, - nvidia_gpu_sm_86, - nvidia_gpu_sm_87, - nvidia_gpu_sm_89, - nvidia_gpu_sm_90, + // + // AA is 01, + // CCCCCCCC is the SM version ID of that architecture + nvidia_gpu_sm_50 = 0x0100000000005000, + nvidia_gpu_sm_52 = 0x0100000000005200, + nvidia_gpu_sm_53 = 0x0100000000005300, + nvidia_gpu_sm_60 = 0x0100000000006000, + nvidia_gpu_sm_61 = 0x0100000000006100, + nvidia_gpu_sm_62 = 0x0100000000006200, + nvidia_gpu_sm_70 = 0x0100000000007000, + nvidia_gpu_sm_72 = 0x0100000000007200, + nvidia_gpu_sm_75 = 0x0100000000007500, + nvidia_gpu_sm_80 = 0x0100000000008000, + nvidia_gpu_sm_86 = 0x0100000000008600, + nvidia_gpu_sm_87 = 0x0100000000008700, + nvidia_gpu_sm_89 = 0x0100000000008900, + nvidia_gpu_sm_90 = 0x0100000000009000, + // // AMD architectures - amd_gpu_gfx700, - amd_gpu_gfx701, - amd_gpu_gfx702, - amd_gpu_gfx801, - amd_gpu_gfx802, - amd_gpu_gfx803, - amd_gpu_gfx805, - amd_gpu_gfx810, - amd_gpu_gfx900, - amd_gpu_gfx902, - amd_gpu_gfx904, - amd_gpu_gfx906, - amd_gpu_gfx908, - amd_gpu_gfx909, - amd_gpu_gfx90a, - amd_gpu_gfx90c, - amd_gpu_gfx940, - amd_gpu_gfx941, - amd_gpu_gfx942, - amd_gpu_gfx1010, - amd_gpu_gfx1011, - amd_gpu_gfx1012, - amd_gpu_gfx1013, - amd_gpu_gfx1030, - amd_gpu_gfx1031, - amd_gpu_gfx1032, - amd_gpu_gfx1033, - amd_gpu_gfx1034, - amd_gpu_gfx1035, - amd_gpu_gfx1036, - amd_gpu_gfx1100, - amd_gpu_gfx1101, - amd_gpu_gfx1102, - amd_gpu_gfx1103, - amd_gpu_gfx1150, - amd_gpu_gfx1151, - amd_gpu_gfx1200, - amd_gpu_gfx1201, - // Update "detail::max_architecture" below if you add new elements here! + // + // AA is 02, + // CCCCCCCC is the GFX version ID of that architecture + amd_gpu_gfx700 = 0x0200000000070000, + amd_gpu_gfx701 = 0x0200000000070100, + amd_gpu_gfx702 = 0x0200000000070200, + amd_gpu_gfx801 = 0x0200000000080100, + amd_gpu_gfx802 = 0x0200000000080200, + amd_gpu_gfx803 = 0x0200000000080300, + amd_gpu_gfx805 = 0x0200000000080500, + amd_gpu_gfx810 = 0x0200000000081000, + amd_gpu_gfx900 = 0x0200000000090000, + amd_gpu_gfx902 = 0x0200000000090200, + amd_gpu_gfx904 = 0x0200000000090400, + amd_gpu_gfx906 = 0x0200000000090600, + amd_gpu_gfx908 = 0x0200000000090800, + amd_gpu_gfx909 = 0x0200000000090900, + amd_gpu_gfx90a = 0x0200000000090a00, + amd_gpu_gfx90c = 0x0200000000090c00, + amd_gpu_gfx940 = 0x0200000000094000, + amd_gpu_gfx941 = 0x0200000000094100, + amd_gpu_gfx942 = 0x0200000000094200, + amd_gpu_gfx1010 = 0x0200000000101000, + amd_gpu_gfx1011 = 0x0200000000101100, + amd_gpu_gfx1012 = 0x0200000000101200, + amd_gpu_gfx1013 = 0x0200000000101300, + amd_gpu_gfx1030 = 0x0200000000103000, + amd_gpu_gfx1031 = 0x0200000000103100, + amd_gpu_gfx1032 = 0x0200000000103200, + amd_gpu_gfx1033 = 0x0200000000103300, + amd_gpu_gfx1034 = 0x0200000000103400, + amd_gpu_gfx1035 = 0x0200000000103500, + amd_gpu_gfx1036 = 0x0200000000103600, + amd_gpu_gfx1100 = 0x0200000000110000, + amd_gpu_gfx1101 = 0x0200000000110100, + amd_gpu_gfx1102 = 0x0200000000110200, + amd_gpu_gfx1103 = 0x0200000000110300, + amd_gpu_gfx1150 = 0x0200000000115000, + amd_gpu_gfx1151 = 0x0200000000115100, + amd_gpu_gfx1200 = 0x0200000000120000, + amd_gpu_gfx1201 = 0x0200000000120100, intel_gpu_8_0_0 = intel_gpu_bdw, intel_gpu_9_0_9 = intel_gpu_skl, intel_gpu_9_1_9 = intel_gpu_kbl, @@ -124,12 +159,49 @@ enum class architecture { intel_gpu_12_10_0 = intel_gpu_dg1, }; +enum class arch_category { + // If new element is added to this enum: + // + // Add + // - "detail::min__architecture" variable below + // - "detail::max__architecture" variable below + // + // Update + // - "detail::get_category_min_architecture()" function below + // - "detail::get_category_max_architecture()" function below + // - "detail::get_device_architecture_category()" function below + // - sycl_ext_oneapi_device_architecture specification doc + // + intel_gpu = 0, + nvidia_gpu = 1, + amd_gpu = 2, + // TODO: add intel_cpu = 3, +}; + } // namespace ext::oneapi::experimental namespace detail { -static constexpr ext::oneapi::experimental::architecture max_architecture = - ext::oneapi::experimental::architecture::amd_gpu_gfx1201; +static constexpr ext::oneapi::experimental::architecture + min_intel_gpu_architecture = + ext::oneapi::experimental::architecture::intel_gpu_bdw; +static constexpr ext::oneapi::experimental::architecture + max_intel_gpu_architecture = + ext::oneapi::experimental::architecture::intel_gpu_pvc_vg; + +static constexpr ext::oneapi::experimental::architecture + min_nvidia_gpu_architecture = + ext::oneapi::experimental::architecture::nvidia_gpu_sm_50; +static constexpr ext::oneapi::experimental::architecture + max_nvidia_gpu_architecture = + ext::oneapi::experimental::architecture::nvidia_gpu_sm_90; + +static constexpr ext::oneapi::experimental::architecture + min_amd_gpu_architecture = + ext::oneapi::experimental::architecture::amd_gpu_gfx700; +static constexpr ext::oneapi::experimental::architecture + max_amd_gpu_architecture = + ext::oneapi::experimental::architecture::amd_gpu_gfx1201; #ifndef __SYCL_TARGET_INTEL_X86_64__ #define __SYCL_TARGET_INTEL_X86_64__ 0 @@ -439,167 +511,247 @@ static constexpr bool is_allowable_aot_mode = (__SYCL_TARGET_AMD_GPU_GFX1200__ == 1) || (__SYCL_TARGET_AMD_GPU_GFX1201__ == 1); -struct IsAOTForArchitectureClass { - // Allocate an array of size == size of - // ext::oneapi::experimental::architecture enum. - bool arr[static_cast(max_architecture) + 1]; - - using arch = ext::oneapi::experimental::architecture; - - constexpr IsAOTForArchitectureClass() : arr() { - arr[static_cast(arch::x86_64)] = __SYCL_TARGET_INTEL_X86_64__ == 1; - arr[static_cast(arch::intel_gpu_bdw)] = - __SYCL_TARGET_INTEL_GPU_BDW__ == 1; - arr[static_cast(arch::intel_gpu_skl)] = - __SYCL_TARGET_INTEL_GPU_SKL__ == 1; - arr[static_cast(arch::intel_gpu_kbl)] = - __SYCL_TARGET_INTEL_GPU_KBL__ == 1; - arr[static_cast(arch::intel_gpu_cfl)] = - __SYCL_TARGET_INTEL_GPU_CFL__ == 1; - arr[static_cast(arch::intel_gpu_apl)] = - __SYCL_TARGET_INTEL_GPU_APL__ == 1; - arr[static_cast(arch::intel_gpu_glk)] = - __SYCL_TARGET_INTEL_GPU_GLK__ == 1; - arr[static_cast(arch::intel_gpu_whl)] = - __SYCL_TARGET_INTEL_GPU_WHL__ == 1; - arr[static_cast(arch::intel_gpu_aml)] = - __SYCL_TARGET_INTEL_GPU_AML__ == 1; - arr[static_cast(arch::intel_gpu_cml)] = - __SYCL_TARGET_INTEL_GPU_CML__ == 1; - arr[static_cast(arch::intel_gpu_icllp)] = - __SYCL_TARGET_INTEL_GPU_ICLLP__ == 1; - arr[static_cast(arch::intel_gpu_ehl)] = - __SYCL_TARGET_INTEL_GPU_EHL__ == 1; - arr[static_cast(arch::intel_gpu_tgllp)] = - __SYCL_TARGET_INTEL_GPU_TGLLP__ == 1; - arr[static_cast(arch::intel_gpu_rkl)] = - __SYCL_TARGET_INTEL_GPU_RKL__ == 1; - arr[static_cast(arch::intel_gpu_adl_s)] = - __SYCL_TARGET_INTEL_GPU_ADL_S__ == 1; - arr[static_cast(arch::intel_gpu_adl_p)] = - __SYCL_TARGET_INTEL_GPU_ADL_P__ == 1; - arr[static_cast(arch::intel_gpu_adl_n)] = - __SYCL_TARGET_INTEL_GPU_ADL_N__ == 1; - arr[static_cast(arch::intel_gpu_dg1)] = - __SYCL_TARGET_INTEL_GPU_DG1__ == 1; - arr[static_cast(arch::intel_gpu_acm_g10)] = - __SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1; - arr[static_cast(arch::intel_gpu_acm_g11)] = - __SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1; - arr[static_cast(arch::intel_gpu_acm_g12)] = - __SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1; - arr[static_cast(arch::intel_gpu_pvc)] = - __SYCL_TARGET_INTEL_GPU_PVC__ == 1; - arr[static_cast(arch::intel_gpu_pvc_vg)] = - __SYCL_TARGET_INTEL_GPU_PVC_VG__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_50)] = - __SYCL_TARGET_NVIDIA_GPU_SM50__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_52)] = - __SYCL_TARGET_NVIDIA_GPU_SM52__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_53)] = - __SYCL_TARGET_NVIDIA_GPU_SM53__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_60)] = - __SYCL_TARGET_NVIDIA_GPU_SM60__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_61)] = - __SYCL_TARGET_NVIDIA_GPU_SM61__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_62)] = - __SYCL_TARGET_NVIDIA_GPU_SM62__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_70)] = - __SYCL_TARGET_NVIDIA_GPU_SM70__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_72)] = - __SYCL_TARGET_NVIDIA_GPU_SM72__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_75)] = - __SYCL_TARGET_NVIDIA_GPU_SM75__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_80)] = - __SYCL_TARGET_NVIDIA_GPU_SM80__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_86)] = - __SYCL_TARGET_NVIDIA_GPU_SM86__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_87)] = - __SYCL_TARGET_NVIDIA_GPU_SM87__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_89)] = - __SYCL_TARGET_NVIDIA_GPU_SM89__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_90)] = - __SYCL_TARGET_NVIDIA_GPU_SM90__ == 1; - arr[static_cast(arch::amd_gpu_gfx700)] = - __SYCL_TARGET_AMD_GPU_GFX700__ == 1; - arr[static_cast(arch::amd_gpu_gfx701)] = - __SYCL_TARGET_AMD_GPU_GFX701__ == 1; - arr[static_cast(arch::amd_gpu_gfx702)] = - __SYCL_TARGET_AMD_GPU_GFX702__ == 1; - arr[static_cast(arch::amd_gpu_gfx801)] = - __SYCL_TARGET_AMD_GPU_GFX801__ == 1; - arr[static_cast(arch::amd_gpu_gfx802)] = - __SYCL_TARGET_AMD_GPU_GFX802__ == 1; - arr[static_cast(arch::amd_gpu_gfx803)] = - __SYCL_TARGET_AMD_GPU_GFX803__ == 1; - arr[static_cast(arch::amd_gpu_gfx805)] = - __SYCL_TARGET_AMD_GPU_GFX805__ == 1; - arr[static_cast(arch::amd_gpu_gfx810)] = - __SYCL_TARGET_AMD_GPU_GFX810__ == 1; - arr[static_cast(arch::amd_gpu_gfx900)] = - __SYCL_TARGET_AMD_GPU_GFX900__ == 1; - arr[static_cast(arch::amd_gpu_gfx902)] = - __SYCL_TARGET_AMD_GPU_GFX902__ == 1; - arr[static_cast(arch::amd_gpu_gfx904)] = - __SYCL_TARGET_AMD_GPU_GFX904__ == 1; - arr[static_cast(arch::amd_gpu_gfx906)] = - __SYCL_TARGET_AMD_GPU_GFX906__ == 1; - arr[static_cast(arch::amd_gpu_gfx908)] = - __SYCL_TARGET_AMD_GPU_GFX908__ == 1; - arr[static_cast(arch::amd_gpu_gfx909)] = - __SYCL_TARGET_AMD_GPU_GFX909__ == 1; - arr[static_cast(arch::amd_gpu_gfx90a)] = - __SYCL_TARGET_AMD_GPU_GFX90A__ == 1; - arr[static_cast(arch::amd_gpu_gfx90c)] = - __SYCL_TARGET_AMD_GPU_GFX90C__ == 1; - arr[static_cast(arch::amd_gpu_gfx940)] = - __SYCL_TARGET_AMD_GPU_GFX940__ == 1; - arr[static_cast(arch::amd_gpu_gfx941)] = - __SYCL_TARGET_AMD_GPU_GFX941__ == 1; - arr[static_cast(arch::amd_gpu_gfx942)] = - __SYCL_TARGET_AMD_GPU_GFX942__ == 1; - arr[static_cast(arch::amd_gpu_gfx1010)] = - __SYCL_TARGET_AMD_GPU_GFX1010__ == 1; - arr[static_cast(arch::amd_gpu_gfx1011)] = - __SYCL_TARGET_AMD_GPU_GFX1011__ == 1; - arr[static_cast(arch::amd_gpu_gfx1012)] = - __SYCL_TARGET_AMD_GPU_GFX1012__ == 1; - arr[static_cast(arch::amd_gpu_gfx1030)] = - __SYCL_TARGET_AMD_GPU_GFX1030__ == 1; - arr[static_cast(arch::amd_gpu_gfx1031)] = - __SYCL_TARGET_AMD_GPU_GFX1031__ == 1; - arr[static_cast(arch::amd_gpu_gfx1032)] = - __SYCL_TARGET_AMD_GPU_GFX1032__ == 1; - arr[static_cast(arch::amd_gpu_gfx1033)] = - __SYCL_TARGET_AMD_GPU_GFX1033__ == 1; - arr[static_cast(arch::amd_gpu_gfx1034)] = - __SYCL_TARGET_AMD_GPU_GFX1034__ == 1; - arr[static_cast(arch::amd_gpu_gfx1035)] = - __SYCL_TARGET_AMD_GPU_GFX1035__ == 1; - arr[static_cast(arch::amd_gpu_gfx1036)] = - __SYCL_TARGET_AMD_GPU_GFX1036__ == 1; - arr[static_cast(arch::amd_gpu_gfx1100)] = - __SYCL_TARGET_AMD_GPU_GFX1100__ == 1; - arr[static_cast(arch::amd_gpu_gfx1101)] = - __SYCL_TARGET_AMD_GPU_GFX1101__ == 1; - arr[static_cast(arch::amd_gpu_gfx1102)] = - __SYCL_TARGET_AMD_GPU_GFX1102__ == 1; - arr[static_cast(arch::amd_gpu_gfx1103)] = - __SYCL_TARGET_AMD_GPU_GFX1103__ == 1; - arr[static_cast(arch::amd_gpu_gfx1150)] = - __SYCL_TARGET_AMD_GPU_GFX1150__ == 1; - arr[static_cast(arch::amd_gpu_gfx1151)] = - __SYCL_TARGET_AMD_GPU_GFX1151__ == 1; - arr[static_cast(arch::amd_gpu_gfx1200)] = - __SYCL_TARGET_AMD_GPU_GFX1200__ == 1; - arr[static_cast(arch::amd_gpu_gfx1201)] = - __SYCL_TARGET_AMD_GPU_GFX1201__ == 1; - } -}; +constexpr static std::optional +get_current_architecture_aot() { + // TODO: re-write the logic below when sycl_ext_oneapi_device_architecture + // will support targets more than one in -fsycl-targets +#if __SYCL_TARGET_INTEL_X86_64__ + return ext::oneapi::experimental::architecture::x86_64; +#endif +#if __SYCL_TARGET_INTEL_GPU_BDW__ + return ext::oneapi::experimental::architecture::intel_gpu_bdw; +#endif +#if __SYCL_TARGET_INTEL_GPU_SKL__ + return ext::oneapi::experimental::architecture::intel_gpu_skl; +#endif +#if __SYCL_TARGET_INTEL_GPU_KBL__ + return ext::oneapi::experimental::architecture::intel_gpu_kbl; +#endif +#if __SYCL_TARGET_INTEL_GPU_CFL__ + return ext::oneapi::experimental::architecture::intel_gpu_cfl; +#endif +#if __SYCL_TARGET_INTEL_GPU_APL__ + return ext::oneapi::experimental::architecture::intel_gpu_apl; +#endif +#if __SYCL_TARGET_INTEL_GPU_GLK__ + return ext::oneapi::experimental::architecture::intel_gpu_glk; +#endif +#if __SYCL_TARGET_INTEL_GPU_WHL__ + return ext::oneapi::experimental::architecture::intel_gpu_whl; +#endif +#if __SYCL_TARGET_INTEL_GPU_AML__ + return ext::oneapi::experimental::architecture::intel_gpu_aml; +#endif +#if __SYCL_TARGET_INTEL_GPU_CML__ + return ext::oneapi::experimental::architecture::intel_gpu_cml; +#endif +#if __SYCL_TARGET_INTEL_GPU_ICLLP__ + return ext::oneapi::experimental::architecture::intel_gpu_icllp; +#endif +#if __SYCL_TARGET_INTEL_GPU_EHL__ + return ext::oneapi::experimental::architecture::intel_gpu_ehl; +#endif +#if __SYCL_TARGET_INTEL_GPU_TGLLP__ + return ext::oneapi::experimental::architecture::intel_gpu_tgllp; +#endif +#if __SYCL_TARGET_INTEL_GPU_RKL__ + return ext::oneapi::experimental::architecture::intel_gpu_rkl; +#endif +#if __SYCL_TARGET_INTEL_GPU_ADL_S__ + return ext::oneapi::experimental::architecture::intel_gpu_adl_s; +#endif +#if __SYCL_TARGET_INTEL_GPU_ADL_P__ + return ext::oneapi::experimental::architecture::intel_gpu_adl_p; +#endif +#if __SYCL_TARGET_INTEL_GPU_ADL_P__ + return ext::oneapi::experimental::architecture::intel_gpu_adl_p; +#endif +#if __SYCL_TARGET_INTEL_GPU_ADL_N__ + return ext::oneapi::experimental::architecture::intel_gpu_adl_n; +#endif +#if __SYCL_TARGET_INTEL_GPU_DG1__ + return ext::oneapi::experimental::architecture::intel_gpu_dg1; +#endif +#if __SYCL_TARGET_INTEL_GPU_ACM_G10__ + return ext::oneapi::experimental::architecture::intel_gpu_acm_g10; +#endif +#if __SYCL_TARGET_INTEL_GPU_ACM_G11__ + return ext::oneapi::experimental::architecture::intel_gpu_acm_g11; +#endif +#if __SYCL_TARGET_INTEL_GPU_ACM_G12__ + return ext::oneapi::experimental::architecture::intel_gpu_acm_g12; +#endif +#if __SYCL_TARGET_INTEL_GPU_PVC__ + return ext::oneapi::experimental::architecture::intel_gpu_pvc; +#endif +#if __SYCL_TARGET_INTEL_GPU_PVC_VG__ + return ext::oneapi::experimental::architecture::intel_gpu_pvc_vg; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM50__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_50; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM52__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_52; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM53__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_53; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM60__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_60; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM61__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_61; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM62__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_62; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM70__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_70; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM72__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_72; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM75__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_75; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM80__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_80; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM86__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_86; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM87__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_87; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM89__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_89; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM90__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_90; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX700__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx700; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX701__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx701; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX702__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx702; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX801__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx801; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX802__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx802; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX803__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx803; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX805__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx805; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX810__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx810; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX900__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx900; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX902__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx902; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX904__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx904; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX906__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx906; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX908__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx908; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX909__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx909; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX90a__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx90a; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX90c__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx90c; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX940__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx940; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX941__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx941; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX942__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx942; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1010__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1010; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1011__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1011; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1012__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1012; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1030__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1030; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1031__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1031; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1032__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1032; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1033__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1033; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1034__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1034; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1035__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1035; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1036__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1036; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1100__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1100; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1101__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1101; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1102__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1102; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1103__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1103; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1150__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1150; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1151__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1151; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1200__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1200; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1201__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1201; +#endif + return std::nullopt; +} -// One entry for each enumerator in "architecture" telling whether the AOT -// target matches that architecture. -static constexpr IsAOTForArchitectureClass is_aot_for_architecture; +// Tells if the AOT target matches that architecture. +constexpr static bool +is_aot_for_architecture(ext::oneapi::experimental::architecture arch) { + constexpr std::optional + current_arch = get_current_architecture_aot(); + if (current_arch.has_value()) + return arch == *current_arch; + return false; +} // Reads the value of "is_allowable_aot_mode" via a template to defer triggering // static_assert() until template instantiation time. @@ -612,28 +764,241 @@ constexpr static bool allowable_aot_mode() { // pack. template constexpr static bool device_architecture_is() { - return (is_aot_for_architecture.arr[static_cast(Archs)] || ...); + return (is_aot_for_architecture(Archs) || ...); +} + +static constexpr std::optional +get_category_min_architecture( + ext::oneapi::experimental::arch_category Category) { + if (Category == ext::oneapi::experimental::arch_category::intel_gpu) { + return min_intel_gpu_architecture; + } else if (Category == ext::oneapi::experimental::arch_category::nvidia_gpu) { + return min_nvidia_gpu_architecture; + } else if (Category == ext::oneapi::experimental::arch_category::amd_gpu) { + return min_amd_gpu_architecture; + } // add "else if " when adding new category, "else" not needed + return std::nullopt; +} + +static constexpr std::optional +get_category_max_architecture( + ext::oneapi::experimental::arch_category Category) { + if (Category == ext::oneapi::experimental::arch_category::intel_gpu) { + return max_intel_gpu_architecture; + } else if (Category == ext::oneapi::experimental::arch_category::nvidia_gpu) { + return max_nvidia_gpu_architecture; + } else if (Category == ext::oneapi::experimental::arch_category::amd_gpu) { + return max_amd_gpu_architecture; + } // add "else if " when adding new category, "else" not needed + return std::nullopt; +} + +template +constexpr static bool device_architecture_is_in_category_aot() { + constexpr std::optional + category_min_arch = get_category_min_architecture(Category); + constexpr std::optional + category_max_arch = get_category_max_architecture(Category); + constexpr std::optional + current_arch = get_current_architecture_aot(); + + if (category_min_arch.has_value() && category_max_arch.has_value() && + current_arch.has_value()) + if ((*category_min_arch <= *current_arch) && + (*current_arch <= *category_max_arch)) + return true; + + return false; +} + +template +constexpr static bool device_architecture_is_in_categories() { + return (device_architecture_is_in_category_aot() || ...); +} + +constexpr static std::optional +get_device_architecture_category(ext::oneapi::experimental::architecture arch) { + auto arch_is_in_segment = + [&arch](ext::oneapi::experimental::architecture min, + ext::oneapi::experimental::architecture max) { + if ((min <= arch) && (arch <= max)) + return true; + return false; + }; + + if (arch_is_in_segment(min_intel_gpu_architecture, + max_intel_gpu_architecture)) + return ext::oneapi::experimental::arch_category::intel_gpu; + if (arch_is_in_segment(min_nvidia_gpu_architecture, + max_nvidia_gpu_architecture)) + return ext::oneapi::experimental::arch_category::nvidia_gpu; + if (arch_is_in_segment(min_amd_gpu_architecture, max_amd_gpu_architecture)) + return ext::oneapi::experimental::arch_category::amd_gpu; + // add "if " when adding new category + + return std::nullopt; +} + +template +constexpr static bool device_architecture_comparison_aot(Compare comp) { + constexpr std::optional + input_arch_category = get_device_architecture_category(Arch); + constexpr std::optional + current_arch = get_current_architecture_aot(); + + if (input_arch_category.has_value() && current_arch.has_value()) { + std::optional + current_arch_category = get_device_architecture_category(*current_arch); + if (current_arch_category.has_value() && + (*input_arch_category == *current_arch_category)) + return comp(*current_arch, Arch); + } + return false; } -// Helper object used to implement "else_if_architecture_is" and "otherwise". -// The "MakeCall" template parameter tells whether a previous clause in the -// "if-elseif-elseif ..." chain was true. When "MakeCall" is false, some -// previous clause was true, so none of the subsequent -// "else_if_architecture_is" or "otherwise" member functions should call the -// user's function. +constexpr auto device_arch_compare_op_lt = + [](ext::oneapi::experimental::architecture a, + ext::oneapi::experimental::architecture b) constexpr { return a < b; }; +constexpr auto device_arch_compare_op_le = + [](ext::oneapi::experimental::architecture a, + ext::oneapi::experimental::architecture b) constexpr { return a <= b; }; +constexpr auto device_arch_compare_op_gt = + [](ext::oneapi::experimental::architecture a, + ext::oneapi::experimental::architecture b) constexpr { return a > b; }; +constexpr auto device_arch_compare_op_ge = + [](ext::oneapi::experimental::architecture a, + ext::oneapi::experimental::architecture b) constexpr { return a >= b; }; + +// Helper object used to implement "else_if_architecture_is", +// "else_if_architecture_is_*" and "otherwise". The "MakeCall" template +// parameter tells whether a previous clause in the "if-elseif-elseif ..." chain +// was true. When "MakeCall" is false, some previous clause was true, so none +// of the subsequent "else_if_architecture_is", "else_if_architecture_is_*" or +// "otherwise" member functions should call the user's function. template class if_architecture_helper { public: + /// The condition is `true` only if the object F comes from a previous call + /// whose associated condition is `false` *and* if the device which executes + /// the `else_if_architecture_is` function has any one of the architectures + /// listed in the @tparam Archs parameter pack. template - constexpr auto else_if_architecture_is(T fnTrue) { + constexpr auto else_if_architecture_is(T fn) { if constexpr (MakeCall && device_architecture_is()) { - fnTrue(); + fn(); + return if_architecture_helper{}; + } else { + (void)fn; + return if_architecture_helper{}; + } + } + + /// The condition is `true` only if the object F comes from a previous call + /// whose associated condition is `false` *and* if the device which executes + /// the `else_if_architecture_is` function has an architecture that is in any + /// one of the categories listed in the @tparam Categories pack. + template + constexpr auto else_if_architecture_is(T fn) { + if constexpr (MakeCall && + device_architecture_is_in_categories()) { + fn(); return if_architecture_helper{}; } else { - (void)fnTrue; + (void)fn; return if_architecture_helper{}; } } + /// The condition is `true` only if the object F comes from a previous call + /// whose associated condition is `false` *and* if the device which executes + /// the `else_if_architecture_is_lt` function has an architecture that is in + /// the same family as @tparam Arch and compares less than @tparam Arch. + template + constexpr auto else_if_architecture_is_lt(T fn) { + if constexpr (MakeCall && + sycl::detail::device_architecture_comparison_aot( + device_arch_compare_op_lt)) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } + } + + /// The condition is `true` only if the object F comes from a previous call + /// whose associated condition is `false` *and* if the device which executes + /// the `else_if_architecture_is_le` function has an architecture that is in + /// the same family as @tparam Arch and compares less than or equal to @tparam + /// Arch. + template + constexpr auto else_if_architecture_is_le(T fn) { + if constexpr (MakeCall && + sycl::detail::device_architecture_comparison_aot( + device_arch_compare_op_le)) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } + } + + /// The condition is `true` only if the object F comes from a previous call + /// whose associated condition is `false` *and* if the device which executes + /// the `else_if_architecture_is_gt` function has an architecture that is in + /// the same family as @tparam Arch and compares greater than @tparam Arch. + template + constexpr auto else_if_architecture_is_gt(T fn) { + if constexpr (MakeCall && + sycl::detail::device_architecture_comparison_aot( + device_arch_compare_op_gt)) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } + } + + /// The condition is `true` only if the object F comes from a previous call + /// whose associated condition is `false` *and* if the device which executes + /// the `else_if_architecture_is_ge` function has an architecture that is in + /// the same family as @tparam Arch and compares greater than or equal to + /// @tparam Arch. + template + constexpr auto else_if_architecture_is_ge(T fn) { + if constexpr (MakeCall && + sycl::detail::device_architecture_comparison_aot( + device_arch_compare_op_ge)) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } + } + + /// The condition is `true` only if the object F comes from a previous call + /// whose associated condition is `false` *and* if the device which executes + /// the `else_if_architecture_is_between` function has an architecture that is + /// in the same family as @tparam Arch1 and is greater than or equal to + /// @tparam Arch1 and is less than or equal to @tparam Arch2. + template + constexpr auto else_if_architecture_is_between(T fn) { + if constexpr (MakeCall && + sycl::detail::device_architecture_comparison_aot( + device_arch_compare_op_ge) && + sycl::detail::device_architecture_comparison_aot( + device_arch_compare_op_le)) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } + } + template constexpr void otherwise(T fn) { if constexpr (MakeCall) { fn(); @@ -644,17 +1009,113 @@ template class if_architecture_helper { namespace ext::oneapi::experimental { +/// The condition is `true` only if the device which executes the +/// `if_architecture_is` function has any one of the architectures listed in the +/// @tparam Archs pack. template -constexpr static auto if_architecture_is(T fnTrue) { +constexpr static auto if_architecture_is(T fn) { static_assert(sycl::detail::allowable_aot_mode(), "The if_architecture_is function may only be used when AOT " "compiling with '-fsycl-targets=spir64_x86_64' or " "'-fsycl-targets=*_gpu_*'"); if constexpr (sycl::detail::device_architecture_is()) { - fnTrue(); + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } +} + +/// The condition is `true` only if the device which executes the +/// `if_architecture_is` function has an architecture that is in any one of the +/// categories listed in the @tparam Categories pack. +template +constexpr static auto if_architecture_is(T fn) { + if constexpr (sycl::detail::device_architecture_is_in_categories< + Categories...>()) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } +} + +/// The condition is `true` only if the device which executes the +/// `if_architecture_is_lt` function has an architecture that is in the same +/// family as @tparam Arch and compares less than @tparam Arch. +template +constexpr static auto if_architecture_is_lt(T fn) { + if constexpr (sycl::detail::device_architecture_comparison_aot( + sycl::detail::device_arch_compare_op_lt)) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } +} + +/// The condition is `true` only if the device which executes the +/// `if_architecture_is_le` function has an architecture that is in the same +/// family as @tparam Arch and compares less than or equal to @tparam Arch. +template +constexpr static auto if_architecture_is_le(T fn) { + if constexpr (sycl::detail::device_architecture_comparison_aot( + sycl::detail::device_arch_compare_op_le)) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } +} + +/// The condition is `true` only if the device which executes the +/// `if_architecture_is_gt` function has an architecture that is in the same +/// family as @tparam Arch and compares greater than @tparam Arch. +template +constexpr static auto if_architecture_is_gt(T fn) { + if constexpr (sycl::detail::device_architecture_comparison_aot( + sycl::detail::device_arch_compare_op_gt)) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } +} + +/// The condition is `true` only if the device which executes the +/// `if_architecture_is_ge` function has an architecture that is in the same +/// family as @tparam Arch and compares greater than or equal to @tparam Arch. +template +constexpr static auto if_architecture_is_ge(T fn) { + if constexpr (sycl::detail::device_architecture_comparison_aot( + sycl::detail::device_arch_compare_op_ge)) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } +} + +/// The condition is `true` only if the device which executes the +/// `if_architecture_is_between` function has an architecture that is in the +/// same family as @tparam Arch1 and is greater than or equal to @tparam +/// Arch1 and is less than or equal to @tparam Arch2. +template +constexpr static auto if_architecture_is_between(T fn) { + if constexpr (sycl::detail::device_architecture_comparison_aot( + sycl::detail::device_arch_compare_op_ge) && + sycl::detail::device_architecture_comparison_aot( + sycl::detail::device_arch_compare_op_le)) { + fn(); return sycl::detail::if_architecture_helper{}; } else { - (void)fnTrue; + (void)fn; return sycl::detail::if_architecture_helper{}; } } diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index f6b4a27b61232..24ff4d20ef03a 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -238,6 +239,18 @@ class device_impl { return Arch == getDeviceArch(); } + bool extOneapiArchitectureIs( + ext::oneapi::experimental::arch_category Category) const { + std::optional CategoryMinArch = + get_category_min_architecture(Category); + std::optional CategoryMaxArch = + get_category_max_architecture(Category); + if (CategoryMinArch.has_value() && CategoryMaxArch.has_value()) + return CategoryMinArch <= getDeviceArch() && + getDeviceArch() <= CategoryMaxArch; + return false; + } + /// Gets the current device timestamp /// @throw sycl::feature_not_supported if feature is not supported on device uint64_t getCurrentDeviceTime(); diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 2bf76a3fa9407..d0753d663da25 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -285,10 +285,16 @@ bool device::ext_oneapi_can_access_peer(const device &peer, return value == 1; } + bool device::ext_oneapi_architecture_is( ext::oneapi::experimental::architecture arch) { return impl->extOneapiArchitectureIs(arch); } +bool device::ext_oneapi_architecture_is( + ext::oneapi::experimental::arch_category category) { + return impl->extOneapiArchitectureIs(category); +} + } // namespace _V1 } // namespace sycl diff --git a/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp new file mode 100644 index 0000000000000..cb1214395f8e4 --- /dev/null +++ b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp @@ -0,0 +1,199 @@ +// REQUIRES: gpu, gpu-intel-pvc +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc %s -o %t.out +// RUN: %{run} %t.out + +#include +#include + +using namespace sycl; +using namespace sycl::ext::oneapi::experimental; + +int main() { + int *result; + int N = 29; + + { + queue q(gpu_selector_v); + + result = (int *)malloc_shared(N * sizeof(int), q); + + q.submit([&](handler &cgh) { cgh.fill(result, 0, N); }).wait(); + + q.submit([&](handler &cgh) { + cgh.single_task([=]() { + // test if_architecture_is with category + if_architecture_is([&]() { + result[0] = 1; + }).otherwise([&]() {}); + + // negative test if_architecture_is with category + if_architecture_is([&]() {}).otherwise( + [&]() { result[1] = 1; }); + + // test else_if_architecture_is with category - 1 + if_architecture_is([&]() {}) + .else_if_architecture_is( + [&]() { result[2] = 1; }) + .otherwise([&]() {}); + + // test else_if_architecture_is with category - 2 + if_architecture_is([&]() {}) + .else_if_architecture_is([&]() {}) + .else_if_architecture_is( + [&]() { result[3] = 1; }) + .otherwise([&]() {}); + + // test if_architecture_is_lt + if_architecture_is_lt([&]() { + result[4] = 1; + }).otherwise([&]() {}); + + // negative test if_architecture_is_lt - 1 + if_architecture_is_lt([&]() {}).otherwise( + [&]() { result[5] = 1; }); + + // negative test if_architecture_is_lt - 2 + if_architecture_is_lt([&]() { + }).otherwise([&]() { result[6] = 1; }); + + // test else_if_architecture_is_lt - 1 + if_architecture_is_lt([&]() {}) + .else_if_architecture_is_lt( + [&]() { result[7] = 1; }) + .otherwise([&]() {}); + + // test else_if_architecture_is_lt - 2 + if_architecture_is_lt([&]() { + }).else_if_architecture_is_lt([&]() {}) + .else_if_architecture_is_lt( + [&]() { result[8] = 1; }) + .otherwise([&]() {}); + + // test if_architecture_is_le + if_architecture_is_le([&]() { + result[9] = 1; + }).otherwise([&]() {}); + + // negative test if_architecture_is_le - 1 + if_architecture_is_le([&]() {}).otherwise( + [&]() { result[10] = 1; }); + + // negative test if_architecture_is_le - 2 + if_architecture_is_le([&]() { + }).otherwise([&]() { result[11] = 1; }); + + // test else_if_architecture_is_le - 1 + if_architecture_is_le([&]() {}) + .else_if_architecture_is_le( + [&]() { result[12] = 1; }) + .otherwise([&]() {}); + + // test else_if_architecture_is_le - 2 + if_architecture_is_le([&]() { + }).else_if_architecture_is_le([&]() {}) + .else_if_architecture_is_le( + [&]() { result[13] = 1; }) + .otherwise([&]() {}); + + // test if_architecture_is_gt + if_architecture_is_gt([&]() { + result[14] = 1; + }).otherwise([&]() {}); + + // negative test if_architecture_is_gt - 1 + if_architecture_is_gt([&]() { + }).otherwise([&]() { result[15] = 1; }); + + // negative test if_architecture_is_gt - 2 + if_architecture_is_gt([&]() { + }).otherwise([&]() { result[16] = 1; }); + + // test else_if_architecture_is_gt - 1 + if_architecture_is_gt([&]() {}) + .else_if_architecture_is_gt( + [&]() { result[17] = 1; }) + .otherwise([&]() {}); + + // test else_if_architecture_is_gt - 2 + if_architecture_is_gt([&]() { + }).else_if_architecture_is_gt([&]() {}) + .else_if_architecture_is_gt( + [&]() { result[18] = 1; }) + .otherwise([&]() {}); + + // test if_architecture_is_ge + if_architecture_is_ge([&]() { + result[19] = 1; + }).otherwise([&]() {}); + + // negative test if_architecture_is_ge - 1 + if_architecture_is_ge([&]() { + }).otherwise([&]() { result[20] = 1; }); + + // negative test if_architecture_is_ge - 2 + if_architecture_is_ge([&]() { + }).otherwise([&]() { result[21] = 1; }); + + // test else_if_architecture_is_ge - 1 + if_architecture_is_ge([&]() {}) + .else_if_architecture_is_ge( + [&]() { result[22] = 1; }) + .otherwise([&]() {}); + + // test else_if_architecture_is_ge - 2 + if_architecture_is_ge([&]() { + }).else_if_architecture_is_ge([&]() {}) + .else_if_architecture_is_ge( + [&]() { result[23] = 1; }) + .otherwise([&]() {}); + + // test if_architecture_is_between + if_architecture_is_between([&]() { + result[24] = 1; + }).otherwise([&]() {}); + + // negative test if_architecture_is_between - 1 + if_architecture_is_between([&]() { + }).otherwise([&]() { result[25] = 1; }); + + // negative test if_architecture_is_between - 2 + if_architecture_is_between([&]() { + }).otherwise([&]() { result[26] = 1; }); + + // test else_if_architecture_is_between - 1 + if_architecture_is_between([&]() {}) + .else_if_architecture_is_between( + [&]() { result[27] = 1; }) + .otherwise([&]() {}); + + // test else_if_architecture_is_between - 2 + if_architecture_is_between([&]() {}) + .else_if_architecture_is_between( + [&]() {}) + .else_if_architecture_is_between( + [&]() { result[28] = 1; }) + .otherwise([&]() {}); + + // if adding new test here, don't forget to increment result's index and + // value of N variable + }); + }); + } + + bool failed = false; + for (int i = 0; i < N; i++) + if (result[i] != 1) { + std::cout << "Verification of the test " << i << " failed." << std::endl; + failed = true; + } + + return failed; +} diff --git a/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp new file mode 100644 index 0000000000000..eafbc0f2aadad --- /dev/null +++ b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp @@ -0,0 +1,42 @@ +// REQUIRES: gpu + +// This test is written only for Intel architectures. It is expected that this +// test will fail on NVIDIA and AMD as the checks for ext_oneapi_architecture_is +// host API expect that device architecture is Intel GPU +// UNSUPPORTED: cuda, hip + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include + +namespace syclex = sycl::ext::oneapi::experimental; + +int main() { + sycl::queue q; + sycl::device dev = q.get_device(); + + assert(dev.ext_oneapi_architecture_is(syclex::arch_category::intel_gpu)); + assert(!dev.ext_oneapi_architecture_is(syclex::arch_category::nvidia_gpu)); + assert(!dev.ext_oneapi_architecture_is(syclex::arch_category::amd_gpu)); + + syclex::architecture intel_gpu_arch = syclex::architecture::intel_gpu_ehl; + assert(intel_gpu_arch < syclex::architecture::intel_gpu_pvc); + assert(intel_gpu_arch <= syclex::architecture::intel_gpu_pvc); + assert(intel_gpu_arch > syclex::architecture::intel_gpu_skl); + assert(intel_gpu_arch >= syclex::architecture::intel_gpu_ehl); + + syclex::architecture nvidia_gpu_arch = syclex::architecture::nvidia_gpu_sm_70; + assert(nvidia_gpu_arch < syclex::architecture::nvidia_gpu_sm_80); + assert(nvidia_gpu_arch <= syclex::architecture::nvidia_gpu_sm_80); + assert(nvidia_gpu_arch > syclex::architecture::nvidia_gpu_sm_53); + assert(nvidia_gpu_arch >= syclex::architecture::nvidia_gpu_sm_70); + + syclex::architecture amd_gpu_arch = syclex::architecture::amd_gpu_gfx908; + assert(amd_gpu_arch < syclex::architecture::amd_gpu_gfx1031); + assert(amd_gpu_arch <= syclex::architecture::amd_gpu_gfx1031); + assert(amd_gpu_arch > syclex::architecture::amd_gpu_gfx810); + assert(amd_gpu_arch >= syclex::architecture::amd_gpu_gfx908); + + return 0; +} diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 4fe3042f807ab..75aca1a6dda64 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4058,6 +4058,7 @@ _ZN4sycl3_V16detail9join_implERKSt6vectorISt10shared_ptrINS1_18kernel_bundle_imp _ZN4sycl3_V16detail9link_implERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EERKS2_INS0_6deviceESaISA_EERKNS0_13property_listE _ZN4sycl3_V16device11get_devicesENS0_4info11device_typeE _ZN4sycl3_V16device26ext_oneapi_architecture_isENS0_3ext6oneapi12experimental12architectureE +_ZN4sycl3_V16device26ext_oneapi_architecture_isENS0_3ext6oneapi12experimental13arch_categoryE _ZN4sycl3_V16device26ext_oneapi_can_access_peerERKS1_NS0_3ext6oneapi11peer_accessE _ZN4sycl3_V16device29ext_oneapi_enable_peer_accessERKS1_ _ZN4sycl3_V16device30ext_oneapi_disable_peer_accessERKS1_ diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 435176097e7bd..fa0cc450ce4ce 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -1017,6 +1017,7 @@ ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z ?ext_oneapi_advise_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEBX_KW4_pi_mem_advice@@V?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4architecture@experimental@oneapi@ext@23@@Z +?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4arch_category@experimental@oneapi@ext@23@@Z ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@@Z ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXXZ ?ext_oneapi_can_access_peer@device@_V1@sycl@@QEAA_NAEBV123@W4peer_access@oneapi@ext@23@@Z