From 2c4f5f7c5c1f3d7fcb3bf4b428d85fa3afa8ee18 Mon Sep 17 00:00:00 2001 From: Dmitry Vodoypanov Date: Sat, 17 Feb 2024 10:17:07 -0800 Subject: [PATCH 01/11] [SYCL] Implement device arch comparison according to sycl_ext_oneapi_device_architecture spec update This patch implements device AOT and host APIs for device architecture comparison in accordance to [spec update](https://github.com/intel/llvm/pull/12259) --- sycl/include/sycl/device.hpp | 11 + .../experimental/device_architecture.hpp | 957 +++++++++++++----- sycl/source/detail/device_impl.hpp | 13 + sycl/source/device.cpp | 6 + ..._architecture_comparison_on_device_aot.cpp | 199 ++++ ...device_architecture_comparison_on_host.cpp | 42 + 6 files changed, 975 insertions(+), 253 deletions(-) create mode 100644 sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp create mode 100644 sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index 03b5a8f10ca33..2b65e077bd08e 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -267,6 +267,17 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase { /// the function. bool ext_oneapi_architecture_is(ext::oneapi::experimental::architecture arch); + /// Indicates if the SYCL device architecture equals is in the category passed + /// to the function. + /// + /// \param category is one of the architecture categories from arch_category + /// enum described in sycl_ext_oneapi_device_architecture specification. + /// + /// \return true if the SYCL device architecture is in the category passed to + /// the function. + bool + ext_oneapi_architecture_is(ext::oneapi::experimental::arch_category category); + // TODO: Remove this diagnostics when __SYCL_WARN_IMAGE_ASPECT is removed. #if defined(__clang__) #pragma clang diagnostic pop diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp index cbd76058825d9..dfc2de8665960 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp @@ -8,6 +8,9 @@ #pragma once +#include +#include + namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { @@ -16,6 +19,8 @@ enum class architecture { // If new element is added to this enum: // // Update + // - "detail::min__architecture" below if needed + // - "detail::max__architecture" below if needed // - sycl_ext_oneapi_device_architecture specification doc // - "-fsycl-targets" description in sycl/doc/UsersManual.md // @@ -24,92 +29,116 @@ enum class architecture { // - the unique ID of the new architecture in SYCL RT source code to support // querying the device architecture // - x86_64, - intel_cpu_spr, - intel_cpu_gnr, - intel_gpu_bdw, - intel_gpu_skl, - intel_gpu_kbl, - intel_gpu_cfl, - intel_gpu_apl, + x86_64 = 0x00000000, + // + // Intel CPU architectures + // + // The requirement for the unique ID for intel_cpu_* architectures below is: + // - the ID must start with 0x0 (to avoid the integer overflow) + // - then goes Intel's vendor ID from underlied backend (which is 8086) + // - the ID ends with the architecture ID from the DEVICE_IP_VERSION extension + // of underlied backend + intel_cpu_spr = 0x08086008, + intel_cpu_gnr = 0x08086009, + // + // Intel GPU architectures + // + // The requirement for the unique ID for intel_gpu_* architectures below is: + // - the ID is GMDID of that architecture + intel_gpu_bdw = 0x02000000, + intel_gpu_skl = 0x02400009, + intel_gpu_kbl = 0x02404009, + intel_gpu_cfl = 0x02408009, + intel_gpu_apl = 0x0240c000, intel_gpu_bxt = intel_gpu_apl, - intel_gpu_glk, - intel_gpu_whl, - intel_gpu_aml, - intel_gpu_cml, - intel_gpu_icllp, - intel_gpu_ehl, + intel_gpu_glk = 0x02410000, + intel_gpu_whl = 0x02414000, + intel_gpu_aml = 0x02418000, + intel_gpu_cml = 0x0241c000, + intel_gpu_icllp = 0x02c00000, + intel_gpu_ehl = 0x02c08000, intel_gpu_jsl = intel_gpu_ehl, - intel_gpu_tgllp, - intel_gpu_rkl, - intel_gpu_adl_s, + intel_gpu_tgllp = 0x03000000, + intel_gpu_rkl = 0x03004000, + intel_gpu_adl_s = 0x03008000, intel_gpu_rpl_s = intel_gpu_adl_s, - intel_gpu_adl_p, - intel_gpu_adl_n, - intel_gpu_dg1, - intel_gpu_acm_g10, + intel_gpu_adl_p = 0x0300c000, + intel_gpu_adl_n = 0x03010000, + intel_gpu_dg1 = 0x03028000, + intel_gpu_acm_g10 = 0x030dc008, intel_gpu_dg2_g10 = intel_gpu_acm_g10, - intel_gpu_acm_g11, + intel_gpu_acm_g11 = 0x030e0005, intel_gpu_dg2_g11 = intel_gpu_acm_g11, - intel_gpu_acm_g12, + intel_gpu_acm_g12 = 0x030e4000, intel_gpu_dg2_g12 = intel_gpu_acm_g12, - intel_gpu_pvc, - intel_gpu_pvc_vg, + intel_gpu_pvc = 0x030f0007, + intel_gpu_pvc_vg = 0x030f4007, + // // NVIDIA architectures - nvidia_gpu_sm_50, - nvidia_gpu_sm_52, - nvidia_gpu_sm_53, - nvidia_gpu_sm_60, - nvidia_gpu_sm_61, - nvidia_gpu_sm_62, - nvidia_gpu_sm_70, - nvidia_gpu_sm_72, - nvidia_gpu_sm_75, - nvidia_gpu_sm_80, - nvidia_gpu_sm_86, - nvidia_gpu_sm_87, - nvidia_gpu_sm_89, - nvidia_gpu_sm_90, + // + // The requirement for the unique ID for nvidia_gpu_* architectures below is: + // - the ID must start with NVIDIA's vendor ID from underlied backend (which + // is 0x10de) + // - the ID must end with SM version ID of that architecture + nvidia_gpu_sm_50 = 0x10de0050, + nvidia_gpu_sm_52 = 0x10de0052, + nvidia_gpu_sm_53 = 0x10de0053, + nvidia_gpu_sm_60 = 0x10de0060, + nvidia_gpu_sm_61 = 0x10de0061, + nvidia_gpu_sm_62 = 0x10de0062, + nvidia_gpu_sm_70 = 0x10de0070, + nvidia_gpu_sm_72 = 0x10de0072, + nvidia_gpu_sm_75 = 0x10de0075, + nvidia_gpu_sm_80 = 0x10de0080, + nvidia_gpu_sm_86 = 0x10de0086, + nvidia_gpu_sm_87 = 0x10de0087, + nvidia_gpu_sm_89 = 0x10de0089, + nvidia_gpu_sm_90 = 0x10de0090, + // // AMD architectures - amd_gpu_gfx700, - amd_gpu_gfx701, - amd_gpu_gfx702, - amd_gpu_gfx801, - amd_gpu_gfx802, - amd_gpu_gfx803, - amd_gpu_gfx805, - amd_gpu_gfx810, - amd_gpu_gfx900, - amd_gpu_gfx902, - amd_gpu_gfx904, - amd_gpu_gfx906, - amd_gpu_gfx908, - amd_gpu_gfx909, - amd_gpu_gfx90a, - amd_gpu_gfx90c, - amd_gpu_gfx940, - amd_gpu_gfx941, - amd_gpu_gfx942, - amd_gpu_gfx1010, - amd_gpu_gfx1011, - amd_gpu_gfx1012, - amd_gpu_gfx1013, - amd_gpu_gfx1030, - amd_gpu_gfx1031, - amd_gpu_gfx1032, - amd_gpu_gfx1033, - amd_gpu_gfx1034, - amd_gpu_gfx1035, - amd_gpu_gfx1036, - amd_gpu_gfx1100, - amd_gpu_gfx1101, - amd_gpu_gfx1102, - amd_gpu_gfx1103, - amd_gpu_gfx1150, - amd_gpu_gfx1151, - amd_gpu_gfx1200, - amd_gpu_gfx1201, - // Update "detail::max_architecture" below if you add new elements here! + // + // The requirement for the unique ID for amd_gpu_* architectures below is: + // - the ID must start with AMD's vendor ID from underlied backend (which is + // 0x1002) + // - the ID must end with GFX version ID of that architecture + amd_gpu_gfx700 = 0x10020700, + amd_gpu_gfx701 = 0x10020701, + amd_gpu_gfx702 = 0x10020702, + amd_gpu_gfx801 = 0x10020801, + amd_gpu_gfx802 = 0x10020802, + amd_gpu_gfx803 = 0x10020803, + amd_gpu_gfx805 = 0x10020805, + amd_gpu_gfx810 = 0x10020810, + amd_gpu_gfx900 = 0x10020900, + amd_gpu_gfx902 = 0x10020902, + amd_gpu_gfx904 = 0x10020904, + amd_gpu_gfx906 = 0x10020906, + amd_gpu_gfx908 = 0x10020908, + amd_gpu_gfx909 = 0x10020909, + amd_gpu_gfx90a = 0x1002090a, + amd_gpu_gfx90c = 0x1002090c, + amd_gpu_gfx940 = 0x10020940, + amd_gpu_gfx941 = 0x10020941, + amd_gpu_gfx942 = 0x10020942, + amd_gpu_gfx1010 = 0x10021010, + amd_gpu_gfx1011 = 0x10021011, + amd_gpu_gfx1012 = 0x10021012, + amd_gpu_gfx1013 = 0x10021013, + amd_gpu_gfx1030 = 0x10021030, + amd_gpu_gfx1031 = 0x10021031, + amd_gpu_gfx1032 = 0x10021032, + amd_gpu_gfx1033 = 0x10021033, + amd_gpu_gfx1034 = 0x10021034, + amd_gpu_gfx1035 = 0x10021035, + amd_gpu_gfx1036 = 0x10021036, + amd_gpu_gfx1100 = 0x10021100, + amd_gpu_gfx1101 = 0x10021101, + amd_gpu_gfx1102 = 0x10021102, + amd_gpu_gfx1103 = 0x10021103, + amd_gpu_gfx1150 = 0x10021150, + amd_gpu_gfx1151 = 0x10021151, + amd_gpu_gfx1200 = 0x10021200, + amd_gpu_gfx1201 = 0x10021201, intel_gpu_8_0_0 = intel_gpu_bdw, intel_gpu_9_0_9 = intel_gpu_skl, intel_gpu_9_1_9 = intel_gpu_kbl, @@ -124,12 +153,37 @@ enum class architecture { intel_gpu_12_10_0 = intel_gpu_dg1, }; +enum class arch_category { + intel_gpu = 0, + nvidia_gpu = 1, + amd_gpu = 2, + // TODO: add intel_cpu +}; + } // namespace ext::oneapi::experimental namespace detail { -static constexpr ext::oneapi::experimental::architecture max_architecture = - ext::oneapi::experimental::architecture::amd_gpu_gfx1201; +static constexpr ext::oneapi::experimental::architecture + min_intel_gpu_architecture = + ext::oneapi::experimental::architecture::intel_gpu_bdw; +static constexpr ext::oneapi::experimental::architecture + max_intel_gpu_architecture = + ext::oneapi::experimental::architecture::intel_gpu_pvc_vg; + +static constexpr ext::oneapi::experimental::architecture + min_nvidia_gpu_architecture = + ext::oneapi::experimental::architecture::nvidia_gpu_sm_50; +static constexpr ext::oneapi::experimental::architecture + max_nvidia_gpu_architecture = + ext::oneapi::experimental::architecture::nvidia_gpu_sm_90; + +static constexpr ext::oneapi::experimental::architecture + min_amd_gpu_architecture = + ext::oneapi::experimental::architecture::amd_gpu_gfx700; +static constexpr ext::oneapi::experimental::architecture + max_amd_gpu_architecture = + ext::oneapi::experimental::architecture::amd_gpu_gfx1201; #ifndef __SYCL_TARGET_INTEL_X86_64__ #define __SYCL_TARGET_INTEL_X86_64__ 0 @@ -439,167 +493,247 @@ static constexpr bool is_allowable_aot_mode = (__SYCL_TARGET_AMD_GPU_GFX1200__ == 1) || (__SYCL_TARGET_AMD_GPU_GFX1201__ == 1); -struct IsAOTForArchitectureClass { - // Allocate an array of size == size of - // ext::oneapi::experimental::architecture enum. - bool arr[static_cast(max_architecture) + 1]; - - using arch = ext::oneapi::experimental::architecture; - - constexpr IsAOTForArchitectureClass() : arr() { - arr[static_cast(arch::x86_64)] = __SYCL_TARGET_INTEL_X86_64__ == 1; - arr[static_cast(arch::intel_gpu_bdw)] = - __SYCL_TARGET_INTEL_GPU_BDW__ == 1; - arr[static_cast(arch::intel_gpu_skl)] = - __SYCL_TARGET_INTEL_GPU_SKL__ == 1; - arr[static_cast(arch::intel_gpu_kbl)] = - __SYCL_TARGET_INTEL_GPU_KBL__ == 1; - arr[static_cast(arch::intel_gpu_cfl)] = - __SYCL_TARGET_INTEL_GPU_CFL__ == 1; - arr[static_cast(arch::intel_gpu_apl)] = - __SYCL_TARGET_INTEL_GPU_APL__ == 1; - arr[static_cast(arch::intel_gpu_glk)] = - __SYCL_TARGET_INTEL_GPU_GLK__ == 1; - arr[static_cast(arch::intel_gpu_whl)] = - __SYCL_TARGET_INTEL_GPU_WHL__ == 1; - arr[static_cast(arch::intel_gpu_aml)] = - __SYCL_TARGET_INTEL_GPU_AML__ == 1; - arr[static_cast(arch::intel_gpu_cml)] = - __SYCL_TARGET_INTEL_GPU_CML__ == 1; - arr[static_cast(arch::intel_gpu_icllp)] = - __SYCL_TARGET_INTEL_GPU_ICLLP__ == 1; - arr[static_cast(arch::intel_gpu_ehl)] = - __SYCL_TARGET_INTEL_GPU_EHL__ == 1; - arr[static_cast(arch::intel_gpu_tgllp)] = - __SYCL_TARGET_INTEL_GPU_TGLLP__ == 1; - arr[static_cast(arch::intel_gpu_rkl)] = - __SYCL_TARGET_INTEL_GPU_RKL__ == 1; - arr[static_cast(arch::intel_gpu_adl_s)] = - __SYCL_TARGET_INTEL_GPU_ADL_S__ == 1; - arr[static_cast(arch::intel_gpu_adl_p)] = - __SYCL_TARGET_INTEL_GPU_ADL_P__ == 1; - arr[static_cast(arch::intel_gpu_adl_n)] = - __SYCL_TARGET_INTEL_GPU_ADL_N__ == 1; - arr[static_cast(arch::intel_gpu_dg1)] = - __SYCL_TARGET_INTEL_GPU_DG1__ == 1; - arr[static_cast(arch::intel_gpu_acm_g10)] = - __SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1; - arr[static_cast(arch::intel_gpu_acm_g11)] = - __SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1; - arr[static_cast(arch::intel_gpu_acm_g12)] = - __SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1; - arr[static_cast(arch::intel_gpu_pvc)] = - __SYCL_TARGET_INTEL_GPU_PVC__ == 1; - arr[static_cast(arch::intel_gpu_pvc_vg)] = - __SYCL_TARGET_INTEL_GPU_PVC_VG__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_50)] = - __SYCL_TARGET_NVIDIA_GPU_SM50__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_52)] = - __SYCL_TARGET_NVIDIA_GPU_SM52__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_53)] = - __SYCL_TARGET_NVIDIA_GPU_SM53__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_60)] = - __SYCL_TARGET_NVIDIA_GPU_SM60__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_61)] = - __SYCL_TARGET_NVIDIA_GPU_SM61__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_62)] = - __SYCL_TARGET_NVIDIA_GPU_SM62__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_70)] = - __SYCL_TARGET_NVIDIA_GPU_SM70__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_72)] = - __SYCL_TARGET_NVIDIA_GPU_SM72__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_75)] = - __SYCL_TARGET_NVIDIA_GPU_SM75__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_80)] = - __SYCL_TARGET_NVIDIA_GPU_SM80__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_86)] = - __SYCL_TARGET_NVIDIA_GPU_SM86__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_87)] = - __SYCL_TARGET_NVIDIA_GPU_SM87__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_89)] = - __SYCL_TARGET_NVIDIA_GPU_SM89__ == 1; - arr[static_cast(arch::nvidia_gpu_sm_90)] = - __SYCL_TARGET_NVIDIA_GPU_SM90__ == 1; - arr[static_cast(arch::amd_gpu_gfx700)] = - __SYCL_TARGET_AMD_GPU_GFX700__ == 1; - arr[static_cast(arch::amd_gpu_gfx701)] = - __SYCL_TARGET_AMD_GPU_GFX701__ == 1; - arr[static_cast(arch::amd_gpu_gfx702)] = - __SYCL_TARGET_AMD_GPU_GFX702__ == 1; - arr[static_cast(arch::amd_gpu_gfx801)] = - __SYCL_TARGET_AMD_GPU_GFX801__ == 1; - arr[static_cast(arch::amd_gpu_gfx802)] = - __SYCL_TARGET_AMD_GPU_GFX802__ == 1; - arr[static_cast(arch::amd_gpu_gfx803)] = - __SYCL_TARGET_AMD_GPU_GFX803__ == 1; - arr[static_cast(arch::amd_gpu_gfx805)] = - __SYCL_TARGET_AMD_GPU_GFX805__ == 1; - arr[static_cast(arch::amd_gpu_gfx810)] = - __SYCL_TARGET_AMD_GPU_GFX810__ == 1; - arr[static_cast(arch::amd_gpu_gfx900)] = - __SYCL_TARGET_AMD_GPU_GFX900__ == 1; - arr[static_cast(arch::amd_gpu_gfx902)] = - __SYCL_TARGET_AMD_GPU_GFX902__ == 1; - arr[static_cast(arch::amd_gpu_gfx904)] = - __SYCL_TARGET_AMD_GPU_GFX904__ == 1; - arr[static_cast(arch::amd_gpu_gfx906)] = - __SYCL_TARGET_AMD_GPU_GFX906__ == 1; - arr[static_cast(arch::amd_gpu_gfx908)] = - __SYCL_TARGET_AMD_GPU_GFX908__ == 1; - arr[static_cast(arch::amd_gpu_gfx909)] = - __SYCL_TARGET_AMD_GPU_GFX909__ == 1; - arr[static_cast(arch::amd_gpu_gfx90a)] = - __SYCL_TARGET_AMD_GPU_GFX90A__ == 1; - arr[static_cast(arch::amd_gpu_gfx90c)] = - __SYCL_TARGET_AMD_GPU_GFX90C__ == 1; - arr[static_cast(arch::amd_gpu_gfx940)] = - __SYCL_TARGET_AMD_GPU_GFX940__ == 1; - arr[static_cast(arch::amd_gpu_gfx941)] = - __SYCL_TARGET_AMD_GPU_GFX941__ == 1; - arr[static_cast(arch::amd_gpu_gfx942)] = - __SYCL_TARGET_AMD_GPU_GFX942__ == 1; - arr[static_cast(arch::amd_gpu_gfx1010)] = - __SYCL_TARGET_AMD_GPU_GFX1010__ == 1; - arr[static_cast(arch::amd_gpu_gfx1011)] = - __SYCL_TARGET_AMD_GPU_GFX1011__ == 1; - arr[static_cast(arch::amd_gpu_gfx1012)] = - __SYCL_TARGET_AMD_GPU_GFX1012__ == 1; - arr[static_cast(arch::amd_gpu_gfx1030)] = - __SYCL_TARGET_AMD_GPU_GFX1030__ == 1; - arr[static_cast(arch::amd_gpu_gfx1031)] = - __SYCL_TARGET_AMD_GPU_GFX1031__ == 1; - arr[static_cast(arch::amd_gpu_gfx1032)] = - __SYCL_TARGET_AMD_GPU_GFX1032__ == 1; - arr[static_cast(arch::amd_gpu_gfx1033)] = - __SYCL_TARGET_AMD_GPU_GFX1033__ == 1; - arr[static_cast(arch::amd_gpu_gfx1034)] = - __SYCL_TARGET_AMD_GPU_GFX1034__ == 1; - arr[static_cast(arch::amd_gpu_gfx1035)] = - __SYCL_TARGET_AMD_GPU_GFX1035__ == 1; - arr[static_cast(arch::amd_gpu_gfx1036)] = - __SYCL_TARGET_AMD_GPU_GFX1036__ == 1; - arr[static_cast(arch::amd_gpu_gfx1100)] = - __SYCL_TARGET_AMD_GPU_GFX1100__ == 1; - arr[static_cast(arch::amd_gpu_gfx1101)] = - __SYCL_TARGET_AMD_GPU_GFX1101__ == 1; - arr[static_cast(arch::amd_gpu_gfx1102)] = - __SYCL_TARGET_AMD_GPU_GFX1102__ == 1; - arr[static_cast(arch::amd_gpu_gfx1103)] = - __SYCL_TARGET_AMD_GPU_GFX1103__ == 1; - arr[static_cast(arch::amd_gpu_gfx1150)] = - __SYCL_TARGET_AMD_GPU_GFX1150__ == 1; - arr[static_cast(arch::amd_gpu_gfx1151)] = - __SYCL_TARGET_AMD_GPU_GFX1151__ == 1; - arr[static_cast(arch::amd_gpu_gfx1200)] = - __SYCL_TARGET_AMD_GPU_GFX1200__ == 1; - arr[static_cast(arch::amd_gpu_gfx1201)] = - __SYCL_TARGET_AMD_GPU_GFX1201__ == 1; - } -}; +constexpr static std::optional +get_current_architecture_aot() { + // TODO: re-write the logic below when sycl_ext_oneapi_device_architecture will + // support -fsycl-targets will targets more than one +#if __SYCL_TARGET_INTEL_X86_64__ + return ext::oneapi::experimental::architecture::x86_64; +#endif +#if __SYCL_TARGET_INTEL_GPU_BDW__ + return ext::oneapi::experimental::architecture::intel_gpu_bdw; +#endif +#if __SYCL_TARGET_INTEL_GPU_SKL__ + return ext::oneapi::experimental::architecture::intel_gpu_skl; +#endif +#if __SYCL_TARGET_INTEL_GPU_KBL__ + return ext::oneapi::experimental::architecture::intel_gpu_kbl; +#endif +#if __SYCL_TARGET_INTEL_GPU_CFL__ + return ext::oneapi::experimental::architecture::intel_gpu_cfl; +#endif +#if __SYCL_TARGET_INTEL_GPU_APL__ + return ext::oneapi::experimental::architecture::intel_gpu_apl; +#endif +#if __SYCL_TARGET_INTEL_GPU_GLK__ + return ext::oneapi::experimental::architecture::intel_gpu_glk; +#endif +#if __SYCL_TARGET_INTEL_GPU_WHL__ + return ext::oneapi::experimental::architecture::intel_gpu_whl; +#endif +#if __SYCL_TARGET_INTEL_GPU_AML__ + return ext::oneapi::experimental::architecture::intel_gpu_aml; +#endif +#if __SYCL_TARGET_INTEL_GPU_CML__ + return ext::oneapi::experimental::architecture::intel_gpu_cml; +#endif +#if __SYCL_TARGET_INTEL_GPU_ICLLP__ + return ext::oneapi::experimental::architecture::intel_gpu_icllp; +#endif +#if __SYCL_TARGET_INTEL_GPU_EHL__ + return ext::oneapi::experimental::architecture::intel_gpu_ehl; +#endif +#if __SYCL_TARGET_INTEL_GPU_TGLLP__ + return ext::oneapi::experimental::architecture::intel_gpu_tgllp; +#endif +#if __SYCL_TARGET_INTEL_GPU_RKL__ + return ext::oneapi::experimental::architecture::intel_gpu_rkl; +#endif +#if __SYCL_TARGET_INTEL_GPU_ADL_S__ + return ext::oneapi::experimental::architecture::intel_gpu_adl_s; +#endif +#if __SYCL_TARGET_INTEL_GPU_ADL_P__ + return ext::oneapi::experimental::architecture::intel_gpu_adl_p; +#endif +#if __SYCL_TARGET_INTEL_GPU_ADL_P__ + return ext::oneapi::experimental::architecture::intel_gpu_adl_p; +#endif +#if __SYCL_TARGET_INTEL_GPU_ADL_N__ + return ext::oneapi::experimental::architecture::intel_gpu_adl_n; +#endif +#if __SYCL_TARGET_INTEL_GPU_DG1__ + return ext::oneapi::experimental::architecture::intel_gpu_dg1; +#endif +#if __SYCL_TARGET_INTEL_GPU_ACM_G10__ + return ext::oneapi::experimental::architecture::intel_gpu_acm_g10; +#endif +#if __SYCL_TARGET_INTEL_GPU_ACM_G11__ + return ext::oneapi::experimental::architecture::intel_gpu_acm_g11; +#endif +#if __SYCL_TARGET_INTEL_GPU_ACM_G12__ + return ext::oneapi::experimental::architecture::intel_gpu_acm_g12; +#endif +#if __SYCL_TARGET_INTEL_GPU_PVC__ + return ext::oneapi::experimental::architecture::intel_gpu_pvc; +#endif +#if __SYCL_TARGET_INTEL_GPU_PVC_VG__ + return ext::oneapi::experimental::architecture::intel_gpu_pvc_vg; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM50__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_50; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM52__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_52; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM53__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_53; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM60__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_60; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM61__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_61; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM62__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_62; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM70__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_70; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM72__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_72; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM75__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_75; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM80__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_80; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM86__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_86; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM87__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_87; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM89__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_89; +#endif +#if __SYCL_TARGET_NVIDIA_GPU_SM90__ + return ext::oneapi::experimental::architecture::nvidia_gpu_sm_90; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX700__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx700; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX701__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx701; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX702__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx702; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX801__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx801; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX802__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx802; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX803__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx803; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX805__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx805; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX810__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx810; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX900__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx900; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX902__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx902; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX904__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx904; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX906__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx906; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX908__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx908; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX909__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx909; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX90a__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx90a; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX90c__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx90c; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX940__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx940; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX941__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx941; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX942__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx942; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1010__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1010; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1011__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1011; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1012__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1012; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1030__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1030; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1031__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1031; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1032__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1032; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1033__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1033; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1034__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1034; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1035__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1035; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1036__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1036; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1100__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1100; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1101__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1101; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1102__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1102; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1103__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1103; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1150__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1150; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1151__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1151; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1200__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1200; +#endif +#if __SYCL_TARGET_AMD_GPU_GFX1201__ + return ext::oneapi::experimental::architecture::amd_gpu_gfx1201; +#endif + return std::nullopt; +} -// One entry for each enumerator in "architecture" telling whether the AOT -// target matches that architecture. -static constexpr IsAOTForArchitectureClass is_aot_for_architecture; +// Tells if the AOT target matches that architecture. +constexpr static bool +is_aot_for_architecture(ext::oneapi::experimental::architecture arch) { + constexpr std::optional + current_arch = get_current_architecture_aot(); + if (current_arch.has_value()) + return static_cast(arch) == static_cast(*current_arch); + return false; +} // Reads the value of "is_allowable_aot_mode" via a template to defer triggering // static_assert() until template instantiation time. @@ -612,28 +746,249 @@ constexpr static bool allowable_aot_mode() { // pack. template constexpr static bool device_architecture_is() { - return (is_aot_for_architecture.arr[static_cast(Archs)] || ...); + return (is_aot_for_architecture(Archs) || ...); +} + +static constexpr std::optional +get_category_min_architecture( + ext::oneapi::experimental::arch_category Category) { + if (Category == ext::oneapi::experimental::arch_category::intel_gpu) { + return min_intel_gpu_architecture; + } else if (Category == ext::oneapi::experimental::arch_category::nvidia_gpu) { + return min_nvidia_gpu_architecture; + } else if (Category == ext::oneapi::experimental::arch_category::amd_gpu) { + return min_amd_gpu_architecture; + } // add "else if " when adding new category, "else" not needed + return std::nullopt; +} + +static constexpr std::optional +get_category_max_architecture( + ext::oneapi::experimental::arch_category Category) { + if (Category == ext::oneapi::experimental::arch_category::intel_gpu) { + return max_intel_gpu_architecture; + } else if (Category == ext::oneapi::experimental::arch_category::nvidia_gpu) { + return max_nvidia_gpu_architecture; + } else if (Category == ext::oneapi::experimental::arch_category::amd_gpu) { + return max_amd_gpu_architecture; + } // add "else if " when adding new category, "else" not needed + return std::nullopt; +} + +template +constexpr static bool device_architecture_is_in_category_aot() { + constexpr std::optional + category_min_arch = get_category_min_architecture(Category); + constexpr std::optional + category_max_arch = get_category_max_architecture(Category); + constexpr std::optional + current_arch = get_current_architecture_aot(); + + if (category_min_arch.has_value() && category_max_arch.has_value() && + current_arch.has_value()) + if ((static_cast(*category_min_arch) <= + static_cast(*current_arch)) && + (static_cast(*current_arch) <= + static_cast(*category_max_arch))) + return true; + + return false; +} + +template +constexpr static bool device_architecture_is_in_categories() { + return (device_architecture_is_in_category_aot() || ...); } -// Helper object used to implement "else_if_architecture_is" and "otherwise". -// The "MakeCall" template parameter tells whether a previous clause in the -// "if-elseif-elseif ..." chain was true. When "MakeCall" is false, some -// previous clause was true, so none of the subsequent -// "else_if_architecture_is" or "otherwise" member functions should call the -// user's function. +constexpr static std::optional +get_architecture_category(ext::oneapi::experimental::architecture arch) { + auto arch_is_in_segment = + [&arch](ext::oneapi::experimental::architecture min, + ext::oneapi::experimental::architecture max) { + if ((static_cast(min) <= static_cast(arch)) && + (static_cast(arch) <= static_cast(max))) + return true; + return false; + }; + + if (arch_is_in_segment(min_intel_gpu_architecture, + max_intel_gpu_architecture)) + return ext::oneapi::experimental::arch_category::intel_gpu; + if (arch_is_in_segment(min_nvidia_gpu_architecture, + max_nvidia_gpu_architecture)) + return ext::oneapi::experimental::arch_category::nvidia_gpu; + if (arch_is_in_segment(min_amd_gpu_architecture, max_amd_gpu_architecture)) + return ext::oneapi::experimental::arch_category::amd_gpu; + // add "if " when adding new category + + return std::nullopt; +} + +template +constexpr static bool device_architecture_comparison_aot(Compare comp) { + constexpr std::optional + input_arch_category = get_architecture_category(Arch); + constexpr std::optional + current_arch = get_current_architecture_aot(); + + if (input_arch_category.has_value() && current_arch.has_value()) { + std::optional + current_arch_category = get_architecture_category(*current_arch); + if (current_arch_category.has_value() && + (*input_arch_category == *current_arch_category)) + return comp(*current_arch, Arch); + } + return false; +} + +// Helper object used to implement "else_if_architecture_is", +// "else_if_architecture_is_*" and "otherwise". The "MakeCall" template +// parameter tells whether a previous clause in the "if-elseif-elseif ..." chain +// was true. When "MakeCall" is false, some previous clause was true, so none +// of the subsequent "else_if_architecture_is", "else_if_architecture_is_*" or +// "otherwise" member functions should call the user's function. template class if_architecture_helper { public: + /// The condition is `true` only if the object F comes from a previous call + /// whose associated condition is `false` *and* if the device which executes + /// the `else_if_architecture_is` function has any one of the architectures + /// listed in the @tparam Archs parameter pack. template - constexpr auto else_if_architecture_is(T fnTrue) { + constexpr auto else_if_architecture_is(T fn) { if constexpr (MakeCall && device_architecture_is()) { - fnTrue(); + fn(); return if_architecture_helper{}; } else { - (void)fnTrue; + (void)fn; return if_architecture_helper{}; } } + /// The condition is `true` only if the object F comes from a previous call + /// whose associated condition is `false` *and* if the device which executes + /// the `else_if_architecture_is` function has an architecture that is in any + /// one of the categories listed in the @tparam Categories pack. + template + constexpr auto else_if_architecture_is(T fn) { + if constexpr (MakeCall && + device_architecture_is_in_categories()) { + fn(); + return if_architecture_helper{}; + } else { + (void)fn; + return if_architecture_helper{}; + } + } + + /// The condition is `true` only if the object F comes from a previous call + /// whose associated condition is `false` *and* if the device which executes + /// the `else_if_architecture_is_lt` function has an architecture that is in + /// the same family as @tparam Arch and compares less than @tparam Arch. + template + constexpr auto else_if_architecture_is_lt(T fn) { + if constexpr (MakeCall && + sycl::detail::device_architecture_comparison_aot( + [](ext::oneapi::experimental::architecture a, + ext::oneapi::experimental::architecture b) { + return a < b; + })) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } + } + + /// The condition is `true` only if the object F comes from a previous call + /// whose associated condition is `false` *and* if the device which executes + /// the `else_if_architecture_is_le` function has an architecture that is in + /// the same family as @tparam Arch and compares less than or equal to @tparam + /// Arch. + template + constexpr auto else_if_architecture_is_le(T fn) { + if constexpr (MakeCall && + sycl::detail::device_architecture_comparison_aot( + [](ext::oneapi::experimental::architecture a, + ext::oneapi::experimental::architecture b) { + return a <= b; + })) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } + } + + /// The condition is `true` only if the object F comes from a previous call + /// whose associated condition is `false` *and* if the device which executes + /// the `else_if_architecture_is_gt` function has an architecture that is in + /// the same family as @tparam Arch and compares greater than @tparam Arch. + template + constexpr auto else_if_architecture_is_gt(T fn) { + if constexpr (MakeCall && + sycl::detail::device_architecture_comparison_aot( + [](ext::oneapi::experimental::architecture a, + ext::oneapi::experimental::architecture b) { + return a > b; + })) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } + } + + /// The condition is `true` only if the object F comes from a previous call + /// whose associated condition is `false` *and* if the device which executes + /// the `else_if_architecture_is_ge` function has an architecture that is in + /// the same family as @tparam Arch and compares greater than or equal to + /// @tparam Arch. + template + constexpr auto else_if_architecture_is_ge(T fn) { + if constexpr (MakeCall && + sycl::detail::device_architecture_comparison_aot( + [](ext::oneapi::experimental::architecture a, + ext::oneapi::experimental::architecture b) { + return a >= b; + })) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } + } + + /// The condition is `true` only if the object F comes from a previous call + /// whose associated condition is `false` *and* if the device which executes + /// the `else_if_architecture_is_between` function has an architecture that is + /// in the same family as @tparam Arch1 and is greater than or equal to + /// @tparam Arch1 and is less than or equal to @tparam Arch2. + template + constexpr auto else_if_architecture_is_between(T fn) { + if constexpr (MakeCall && + sycl::detail::device_architecture_comparison_aot( + [](ext::oneapi::experimental::architecture a, + ext::oneapi::experimental::architecture b) { + return a >= b; + }) && + sycl::detail::device_architecture_comparison_aot( + [](ext::oneapi::experimental::architecture a, + ext::oneapi::experimental::architecture b) { + return a <= b; + })) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } + } + template constexpr void otherwise(T fn) { if constexpr (MakeCall) { fn(); @@ -644,17 +999,113 @@ template class if_architecture_helper { namespace ext::oneapi::experimental { +/// The condition is `true` only if the device which executes the +/// `if_architecture_is` function has any one of the architectures listed in the +/// @tparam Archs pack. template -constexpr static auto if_architecture_is(T fnTrue) { +constexpr static auto if_architecture_is(T fn) { static_assert(sycl::detail::allowable_aot_mode(), "The if_architecture_is function may only be used when AOT " "compiling with '-fsycl-targets=spir64_x86_64' or " "'-fsycl-targets=*_gpu_*'"); if constexpr (sycl::detail::device_architecture_is()) { - fnTrue(); + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } +} + +/// The condition is `true` only if the device which executes the +/// `if_architecture_is` function has an architecture that is in any one of the +/// categories listed in the @tparam Categories pack. +template +constexpr static auto if_architecture_is(T fn) { + if constexpr (sycl::detail::device_architecture_is_in_categories< + Categories...>()) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } +} + +/// The condition is `true` only if the device which executes the +/// `if_architecture_is_lt` function has an architecture that is in the same +/// family as @tparam Arch and compares less than @tparam Arch. +template +constexpr static auto if_architecture_is_lt(T fn) { + if constexpr (sycl::detail::device_architecture_comparison_aot( + [](architecture a, architecture b) { return a < b; })) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } +} + +/// The condition is `true` only if the device which executes the +/// `if_architecture_is_le` function has an architecture that is in the same +/// family as @tparam Arch and compares less than or equal to @tparam Arch. +template +constexpr static auto if_architecture_is_le(T fn) { + if constexpr (sycl::detail::device_architecture_comparison_aot( + [](architecture a, architecture b) { return a <= b; })) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } +} + +/// The condition is `true` only if the device which executes the +/// `if_architecture_is_gt` function has an architecture that is in the same +/// family as @tparam Arch and compares greater than @tparam Arch. +template +constexpr static auto if_architecture_is_gt(T fn) { + if constexpr (sycl::detail::device_architecture_comparison_aot( + [](architecture a, architecture b) { return a > b; })) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } +} + +/// The condition is `true` only if the device which executes the +/// `if_architecture_is_ge` function has an architecture that is in the same +/// family as @tparam Arch and compares greater than or equal to @tparam Arch. +template +constexpr static auto if_architecture_is_ge(T fn) { + if constexpr (sycl::detail::device_architecture_comparison_aot( + [](architecture a, architecture b) { return a >= b; })) { + fn(); + return sycl::detail::if_architecture_helper{}; + } else { + (void)fn; + return sycl::detail::if_architecture_helper{}; + } +} + +/// The condition is `true` only if the device which executes the +/// `if_architecture_is_between` function has an architecture that is in the +/// same family as @tparam Arch1 and is greater than or equal to @tparam +/// Arch1 and is less than or equal to @tparam Arch2. +template +constexpr static auto if_architecture_is_between(T fn) { + if constexpr (sycl::detail::device_architecture_comparison_aot( + [](architecture a, architecture b) { return a >= b; }) && + sycl::detail::device_architecture_comparison_aot( + [](architecture a, architecture b) { return a <= b; })) { + fn(); return sycl::detail::if_architecture_helper{}; } else { - (void)fnTrue; + (void)fn; return sycl::detail::if_architecture_helper{}; } } diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 8ecaa6a2bfc62..7771799ff7db7 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -239,6 +240,18 @@ class device_impl { return Arch == getDeviceArch(); } + bool extOneapiArchitectureIs( + ext::oneapi::experimental::arch_category Category) const { + std::optional CategoryMinArch = + get_category_min_architecture(Category); + std::optional CategoryMaxArch = + get_category_max_architecture(Category); + if (CategoryMinArch.has_value() && CategoryMaxArch.has_value()) + return CategoryMinArch <= getDeviceArch() && + getDeviceArch() <= CategoryMaxArch; + return false; + } + /// Gets the current device timestamp /// @throw sycl::feature_not_supported if feature is not supported on device uint64_t getCurrentDeviceTime(); diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 73a05080c5b0d..0d39bc825d82b 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -257,10 +257,16 @@ bool device::ext_oneapi_can_access_peer(const device &peer, return value == 1; } + bool device::ext_oneapi_architecture_is( ext::oneapi::experimental::architecture arch) { return impl->extOneapiArchitectureIs(arch); } +bool device::ext_oneapi_architecture_is( + ext::oneapi::experimental::arch_category category) { + return impl->extOneapiArchitectureIs(category); +} + } // namespace _V1 } // namespace sycl diff --git a/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp new file mode 100644 index 0000000000000..cb1214395f8e4 --- /dev/null +++ b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp @@ -0,0 +1,199 @@ +// REQUIRES: gpu, gpu-intel-pvc +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc %s -o %t.out +// RUN: %{run} %t.out + +#include +#include + +using namespace sycl; +using namespace sycl::ext::oneapi::experimental; + +int main() { + int *result; + int N = 29; + + { + queue q(gpu_selector_v); + + result = (int *)malloc_shared(N * sizeof(int), q); + + q.submit([&](handler &cgh) { cgh.fill(result, 0, N); }).wait(); + + q.submit([&](handler &cgh) { + cgh.single_task([=]() { + // test if_architecture_is with category + if_architecture_is([&]() { + result[0] = 1; + }).otherwise([&]() {}); + + // negative test if_architecture_is with category + if_architecture_is([&]() {}).otherwise( + [&]() { result[1] = 1; }); + + // test else_if_architecture_is with category - 1 + if_architecture_is([&]() {}) + .else_if_architecture_is( + [&]() { result[2] = 1; }) + .otherwise([&]() {}); + + // test else_if_architecture_is with category - 2 + if_architecture_is([&]() {}) + .else_if_architecture_is([&]() {}) + .else_if_architecture_is( + [&]() { result[3] = 1; }) + .otherwise([&]() {}); + + // test if_architecture_is_lt + if_architecture_is_lt([&]() { + result[4] = 1; + }).otherwise([&]() {}); + + // negative test if_architecture_is_lt - 1 + if_architecture_is_lt([&]() {}).otherwise( + [&]() { result[5] = 1; }); + + // negative test if_architecture_is_lt - 2 + if_architecture_is_lt([&]() { + }).otherwise([&]() { result[6] = 1; }); + + // test else_if_architecture_is_lt - 1 + if_architecture_is_lt([&]() {}) + .else_if_architecture_is_lt( + [&]() { result[7] = 1; }) + .otherwise([&]() {}); + + // test else_if_architecture_is_lt - 2 + if_architecture_is_lt([&]() { + }).else_if_architecture_is_lt([&]() {}) + .else_if_architecture_is_lt( + [&]() { result[8] = 1; }) + .otherwise([&]() {}); + + // test if_architecture_is_le + if_architecture_is_le([&]() { + result[9] = 1; + }).otherwise([&]() {}); + + // negative test if_architecture_is_le - 1 + if_architecture_is_le([&]() {}).otherwise( + [&]() { result[10] = 1; }); + + // negative test if_architecture_is_le - 2 + if_architecture_is_le([&]() { + }).otherwise([&]() { result[11] = 1; }); + + // test else_if_architecture_is_le - 1 + if_architecture_is_le([&]() {}) + .else_if_architecture_is_le( + [&]() { result[12] = 1; }) + .otherwise([&]() {}); + + // test else_if_architecture_is_le - 2 + if_architecture_is_le([&]() { + }).else_if_architecture_is_le([&]() {}) + .else_if_architecture_is_le( + [&]() { result[13] = 1; }) + .otherwise([&]() {}); + + // test if_architecture_is_gt + if_architecture_is_gt([&]() { + result[14] = 1; + }).otherwise([&]() {}); + + // negative test if_architecture_is_gt - 1 + if_architecture_is_gt([&]() { + }).otherwise([&]() { result[15] = 1; }); + + // negative test if_architecture_is_gt - 2 + if_architecture_is_gt([&]() { + }).otherwise([&]() { result[16] = 1; }); + + // test else_if_architecture_is_gt - 1 + if_architecture_is_gt([&]() {}) + .else_if_architecture_is_gt( + [&]() { result[17] = 1; }) + .otherwise([&]() {}); + + // test else_if_architecture_is_gt - 2 + if_architecture_is_gt([&]() { + }).else_if_architecture_is_gt([&]() {}) + .else_if_architecture_is_gt( + [&]() { result[18] = 1; }) + .otherwise([&]() {}); + + // test if_architecture_is_ge + if_architecture_is_ge([&]() { + result[19] = 1; + }).otherwise([&]() {}); + + // negative test if_architecture_is_ge - 1 + if_architecture_is_ge([&]() { + }).otherwise([&]() { result[20] = 1; }); + + // negative test if_architecture_is_ge - 2 + if_architecture_is_ge([&]() { + }).otherwise([&]() { result[21] = 1; }); + + // test else_if_architecture_is_ge - 1 + if_architecture_is_ge([&]() {}) + .else_if_architecture_is_ge( + [&]() { result[22] = 1; }) + .otherwise([&]() {}); + + // test else_if_architecture_is_ge - 2 + if_architecture_is_ge([&]() { + }).else_if_architecture_is_ge([&]() {}) + .else_if_architecture_is_ge( + [&]() { result[23] = 1; }) + .otherwise([&]() {}); + + // test if_architecture_is_between + if_architecture_is_between([&]() { + result[24] = 1; + }).otherwise([&]() {}); + + // negative test if_architecture_is_between - 1 + if_architecture_is_between([&]() { + }).otherwise([&]() { result[25] = 1; }); + + // negative test if_architecture_is_between - 2 + if_architecture_is_between([&]() { + }).otherwise([&]() { result[26] = 1; }); + + // test else_if_architecture_is_between - 1 + if_architecture_is_between([&]() {}) + .else_if_architecture_is_between( + [&]() { result[27] = 1; }) + .otherwise([&]() {}); + + // test else_if_architecture_is_between - 2 + if_architecture_is_between([&]() {}) + .else_if_architecture_is_between( + [&]() {}) + .else_if_architecture_is_between( + [&]() { result[28] = 1; }) + .otherwise([&]() {}); + + // if adding new test here, don't forget to increment result's index and + // value of N variable + }); + }); + } + + bool failed = false; + for (int i = 0; i < N; i++) + if (result[i] != 1) { + std::cout << "Verification of the test " << i << " failed." << std::endl; + failed = true; + } + + return failed; +} diff --git a/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp new file mode 100644 index 0000000000000..eafbc0f2aadad --- /dev/null +++ b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp @@ -0,0 +1,42 @@ +// REQUIRES: gpu + +// This test is written only for Intel architectures. It is expected that this +// test will fail on NVIDIA and AMD as the checks for ext_oneapi_architecture_is +// host API expect that device architecture is Intel GPU +// UNSUPPORTED: cuda, hip + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include + +namespace syclex = sycl::ext::oneapi::experimental; + +int main() { + sycl::queue q; + sycl::device dev = q.get_device(); + + assert(dev.ext_oneapi_architecture_is(syclex::arch_category::intel_gpu)); + assert(!dev.ext_oneapi_architecture_is(syclex::arch_category::nvidia_gpu)); + assert(!dev.ext_oneapi_architecture_is(syclex::arch_category::amd_gpu)); + + syclex::architecture intel_gpu_arch = syclex::architecture::intel_gpu_ehl; + assert(intel_gpu_arch < syclex::architecture::intel_gpu_pvc); + assert(intel_gpu_arch <= syclex::architecture::intel_gpu_pvc); + assert(intel_gpu_arch > syclex::architecture::intel_gpu_skl); + assert(intel_gpu_arch >= syclex::architecture::intel_gpu_ehl); + + syclex::architecture nvidia_gpu_arch = syclex::architecture::nvidia_gpu_sm_70; + assert(nvidia_gpu_arch < syclex::architecture::nvidia_gpu_sm_80); + assert(nvidia_gpu_arch <= syclex::architecture::nvidia_gpu_sm_80); + assert(nvidia_gpu_arch > syclex::architecture::nvidia_gpu_sm_53); + assert(nvidia_gpu_arch >= syclex::architecture::nvidia_gpu_sm_70); + + syclex::architecture amd_gpu_arch = syclex::architecture::amd_gpu_gfx908; + assert(amd_gpu_arch < syclex::architecture::amd_gpu_gfx1031); + assert(amd_gpu_arch <= syclex::architecture::amd_gpu_gfx1031); + assert(amd_gpu_arch > syclex::architecture::amd_gpu_gfx810); + assert(amd_gpu_arch >= syclex::architecture::amd_gpu_gfx908); + + return 0; +} From cce8b746819f54fc7784920ebcc8a8c5972a28d8 Mon Sep 17 00:00:00 2001 From: Dmitry Vodoypanov Date: Sat, 17 Feb 2024 10:31:20 -0800 Subject: [PATCH 02/11] Cleanup --- .../experimental/device_architecture.hpp | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp index dfc2de8665960..6e76be4dad8c6 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp @@ -495,8 +495,8 @@ static constexpr bool is_allowable_aot_mode = constexpr static std::optional get_current_architecture_aot() { - // TODO: re-write the logic below when sycl_ext_oneapi_device_architecture will - // support -fsycl-targets will targets more than one + // TODO: re-write the logic below when sycl_ext_oneapi_device_architecture + // will support targets more than one in -fsycl-targets #if __SYCL_TARGET_INTEL_X86_64__ return ext::oneapi::experimental::architecture::x86_64; #endif @@ -731,7 +731,7 @@ is_aot_for_architecture(ext::oneapi::experimental::architecture arch) { constexpr std::optional current_arch = get_current_architecture_aot(); if (current_arch.has_value()) - return static_cast(arch) == static_cast(*current_arch); + return arch == *current_arch; return false; } @@ -786,10 +786,8 @@ constexpr static bool device_architecture_is_in_category_aot() { if (category_min_arch.has_value() && category_max_arch.has_value() && current_arch.has_value()) - if ((static_cast(*category_min_arch) <= - static_cast(*current_arch)) && - (static_cast(*current_arch) <= - static_cast(*category_max_arch))) + if ((*category_min_arch <= *current_arch) && + (*current_arch <= *category_max_arch)) return true; return false; @@ -801,12 +799,11 @@ constexpr static bool device_architecture_is_in_categories() { } constexpr static std::optional -get_architecture_category(ext::oneapi::experimental::architecture arch) { +get_device_architecture_category(ext::oneapi::experimental::architecture arch) { auto arch_is_in_segment = [&arch](ext::oneapi::experimental::architecture min, ext::oneapi::experimental::architecture max) { - if ((static_cast(min) <= static_cast(arch)) && - (static_cast(arch) <= static_cast(max))) + if ((min <= arch) && (arch <= max)) return true; return false; }; @@ -827,13 +824,13 @@ get_architecture_category(ext::oneapi::experimental::architecture arch) { template constexpr static bool device_architecture_comparison_aot(Compare comp) { constexpr std::optional - input_arch_category = get_architecture_category(Arch); + input_arch_category = get_device_architecture_category(Arch); constexpr std::optional current_arch = get_current_architecture_aot(); if (input_arch_category.has_value() && current_arch.has_value()) { std::optional - current_arch_category = get_architecture_category(*current_arch); + current_arch_category = get_device_architecture_category(*current_arch); if (current_arch_category.has_value() && (*input_arch_category == *current_arch_category)) return comp(*current_arch, Arch); From a85f2620042d83234ffb4e693ad9bdac7539f63f Mon Sep 17 00:00:00 2001 From: Dmitry Vodoypanov Date: Sat, 17 Feb 2024 11:39:27 -0800 Subject: [PATCH 03/11] Update ABI tests --- sycl/test/abi/sycl_symbols_linux.dump | 1 + sycl/test/abi/sycl_symbols_windows.dump | 71 +++++++++++++------------ 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index edc3d9b41a00d..f11795e9f7887 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4058,6 +4058,7 @@ _ZN4sycl3_V16detail9join_implERKSt6vectorISt10shared_ptrINS1_18kernel_bundle_imp _ZN4sycl3_V16detail9link_implERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EERKS2_INS0_6deviceESaISA_EERKNS0_13property_listE _ZN4sycl3_V16device11get_devicesENS0_4info11device_typeE _ZN4sycl3_V16device26ext_oneapi_architecture_isENS0_3ext6oneapi12experimental12architectureE +_ZN4sycl3_V16device26ext_oneapi_architecture_isENS0_3ext6oneapi12experimental13arch_categoryE _ZN4sycl3_V16device26ext_oneapi_can_access_peerERKS1_NS0_3ext6oneapi11peer_accessE _ZN4sycl3_V16device29ext_oneapi_enable_peer_accessERKS1_ _ZN4sycl3_V16device30ext_oneapi_disable_peer_accessERKS1_ diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 98be6da4c5f37..dbc384d50ff5e 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -1012,6 +1012,7 @@ ?ext_intel_read_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z ?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4architecture@experimental@oneapi@ext@23@@Z +?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4arch_category@experimental@oneapi@ext@23@@Z ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@@Z ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXXZ ?ext_oneapi_can_access_peer@device@_V1@sycl@@QEAA_NAEBV123@W4peer_access@oneapi@ext@23@@Z @@ -1278,38 +1279,38 @@ ?get_max_statement_size@stream@_V1@sycl@@QEBA_KXZ ?get_max_statement_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ ?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVdevice@45@AEBVcontext@45@@Z -?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVqueue@45@@Z -?get_mip_level_mem_handle@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AUimage_mem_handle@23456@I@Z -?get_name@kernel_id@_V1@sycl@@QEBAPEBDXZ -?get_node_from_event@node@experimental@oneapi@ext@_V1@sycl@@SA?AV123456@Vevent@56@@Z -?get_nodes@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -?get_num_channels@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBAIXZ -?get_pipe_name@pipe_base@experimental@intel@ext@_V1@sycl@@KA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEBX@Z -?get_pitch@image_impl@detail@_V1@sycl@@QEBA?AV?$range@$01@34@XZ +?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVqueue@45@@Z +?get_mip_level_mem_handle@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AUimage_mem_handle@23456@I@Z +?get_name@kernel_id@_V1@sycl@@QEBAPEBDXZ +?get_node_from_event@node@experimental@oneapi@ext@_V1@sycl@@SA?AV123456@Vevent@56@@Z +?get_nodes@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +?get_num_channels@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBAIXZ +?get_pipe_name@pipe_base@experimental@intel@ext@_V1@sycl@@KA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEBX@Z +?get_pitch@image_impl@detail@_V1@sycl@@QEBA?AV?$range@$01@34@XZ ?get_pitch@image_plain@detail@_V1@sycl@@IEBA?AV?$range@$01@34@XZ ?get_platform@context@_V1@sycl@@QEBA?AVplatform@23@XZ ?get_platform@device@_V1@sycl@@QEBA?AVplatform@23@XZ ?get_platforms@platform@_V1@sycl@@SA?AV?$vector@Vplatform@_V1@sycl@@V?$allocator@Vplatform@_V1@sycl@@@std@@@std@@XZ -?get_pointer_device@_V1@sycl@@YA?AVdevice@12@PEBXAEBVcontext@12@@Z -?get_pointer_type@_V1@sycl@@YA?AW4alloc@usm@12@PEBXAEBVcontext@12@@Z -?get_precision@stream@_V1@sycl@@QEBA_KXZ -?get_predecessors@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -?get_queue@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEBA?AVqueue@56@XZ -?get_range@image_impl@detail@_V1@sycl@@QEBA?AV?$range@$02@34@XZ -?get_range@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$range@$02@56@XZ -?get_range@image_plain@detail@_V1@sycl@@IEBA?AV?$range@$02@34@XZ -?get_root_nodes@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -?get_size@image_plain@detail@_V1@sycl@@IEBA_KXZ -?get_size@stream@_V1@sycl@@QEBA_KXZ -?get_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ -?get_specialization_constant_impl@kernel_bundle_plain@detail@_V1@sycl@@IEBAXPEBDPEAX@Z -?get_stream_mode@stream@_V1@sycl@@QEBA?AW4stream_manipulator@23@XZ -?get_successors@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -?get_type@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AW4image_type@23456@XZ -?get_type@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AW4node_type@23456@XZ -?get_wait_list@event@_V1@sycl@@QEAA?AV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@XZ -?get_width@stream@_V1@sycl@@QEBA_KXZ -?get_work_item_buffer_size@stream@_V1@sycl@@QEBA_KXZ +?get_pointer_device@_V1@sycl@@YA?AVdevice@12@PEBXAEBVcontext@12@@Z +?get_pointer_type@_V1@sycl@@YA?AW4alloc@usm@12@PEBXAEBVcontext@12@@Z +?get_precision@stream@_V1@sycl@@QEBA_KXZ +?get_predecessors@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +?get_queue@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEBA?AVqueue@56@XZ +?get_range@image_impl@detail@_V1@sycl@@QEBA?AV?$range@$02@34@XZ +?get_range@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$range@$02@56@XZ +?get_range@image_plain@detail@_V1@sycl@@IEBA?AV?$range@$02@34@XZ +?get_root_nodes@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +?get_size@image_plain@detail@_V1@sycl@@IEBA_KXZ +?get_size@stream@_V1@sycl@@QEBA_KXZ +?get_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ +?get_specialization_constant_impl@kernel_bundle_plain@detail@_V1@sycl@@IEBAXPEBDPEAX@Z +?get_stream_mode@stream@_V1@sycl@@QEBA?AW4stream_manipulator@23@XZ +?get_successors@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +?get_type@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AW4image_type@23456@XZ +?get_type@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AW4node_type@23456@XZ +?get_wait_list@event@_V1@sycl@@QEAA?AV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@XZ +?get_width@stream@_V1@sycl@@QEBA_KXZ +?get_work_item_buffer_size@stream@_V1@sycl@@QEBA_KXZ ?get_work_item_buffer_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ ?gpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z ?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$function@$$A6AXPEAX@Z@std@@_K_N@Z @@ -1493,13 +1494,13 @@ ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z ?setPitches@image_impl@detail@_V1@sycl@@AEAAXAEBV?$range@$01@34@@Z ?setPitches@image_impl@detail@_V1@sycl@@AEAAXXZ -?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ -?setStateSpecConstSet@handler@_V1@sycl@@AEAAXXZ -?setType@handler@_V1@sycl@@AEAAXW4CGTYPE@CG@detail@23@@Z -?setUserFacingNodeType@handler@_V1@sycl@@AEAAXW4node_type@experimental@oneapi@ext@23@@Z -?set_final_data@SYCLMemObjT@detail@_V1@sycl@@QEAAX$$T@Z -?set_final_data@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z -?set_final_data_from_storage@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ +?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ +?setStateSpecConstSet@handler@_V1@sycl@@AEAAXXZ +?setType@handler@_V1@sycl@@AEAAXW4CGTYPE@CG@detail@23@@Z +?setUserFacingNodeType@handler@_V1@sycl@@AEAAXW4node_type@experimental@oneapi@ext@23@@Z +?set_final_data@SYCLMemObjT@detail@_V1@sycl@@QEAAX$$T@Z +?set_final_data@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z +?set_final_data_from_storage@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXXZ ?set_final_data_internal@image_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z From 584a6f94dbdaf8b1507030f47edde52e1064dc60 Mon Sep 17 00:00:00 2001 From: Dmitry Vodoypanov Date: Sun, 18 Feb 2024 15:22:16 -0800 Subject: [PATCH 04/11] =?UTF-8?q?Fix=20'error:=20expression=20=E2=80=98=E2=80=99=20is=20not=20a=20constant=20expression'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../experimental/device_architecture.hpp | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp index 6e76be4dad8c6..401de7cc0666d 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp @@ -886,7 +886,7 @@ template class if_architecture_helper { if constexpr (MakeCall && sycl::detail::device_architecture_comparison_aot( [](ext::oneapi::experimental::architecture a, - ext::oneapi::experimental::architecture b) { + ext::oneapi::experimental::architecture b) constexpr { return a < b; })) { fn(); @@ -907,7 +907,7 @@ template class if_architecture_helper { if constexpr (MakeCall && sycl::detail::device_architecture_comparison_aot( [](ext::oneapi::experimental::architecture a, - ext::oneapi::experimental::architecture b) { + ext::oneapi::experimental::architecture b) constexpr { return a <= b; })) { fn(); @@ -927,7 +927,7 @@ template class if_architecture_helper { if constexpr (MakeCall && sycl::detail::device_architecture_comparison_aot( [](ext::oneapi::experimental::architecture a, - ext::oneapi::experimental::architecture b) { + ext::oneapi::experimental::architecture b) constexpr { return a > b; })) { fn(); @@ -948,7 +948,7 @@ template class if_architecture_helper { if constexpr (MakeCall && sycl::detail::device_architecture_comparison_aot( [](ext::oneapi::experimental::architecture a, - ext::oneapi::experimental::architecture b) { + ext::oneapi::experimental::architecture b) constexpr { return a >= b; })) { fn(); @@ -970,12 +970,12 @@ template class if_architecture_helper { if constexpr (MakeCall && sycl::detail::device_architecture_comparison_aot( [](ext::oneapi::experimental::architecture a, - ext::oneapi::experimental::architecture b) { + ext::oneapi::experimental::architecture b) constexpr { return a >= b; }) && sycl::detail::device_architecture_comparison_aot( [](ext::oneapi::experimental::architecture a, - ext::oneapi::experimental::architecture b) { + ext::oneapi::experimental::architecture b) constexpr { return a <= b; })) { fn(); @@ -1035,7 +1035,9 @@ constexpr static auto if_architecture_is(T fn) { template constexpr static auto if_architecture_is_lt(T fn) { if constexpr (sycl::detail::device_architecture_comparison_aot( - [](architecture a, architecture b) { return a < b; })) { + [](architecture a, architecture b) constexpr { + return a < b; + })) { fn(); return sycl::detail::if_architecture_helper{}; } else { @@ -1050,7 +1052,9 @@ constexpr static auto if_architecture_is_lt(T fn) { template constexpr static auto if_architecture_is_le(T fn) { if constexpr (sycl::detail::device_architecture_comparison_aot( - [](architecture a, architecture b) { return a <= b; })) { + [](architecture a, architecture b) constexpr { + return a <= b; + })) { fn(); return sycl::detail::if_architecture_helper{}; } else { @@ -1065,7 +1069,9 @@ constexpr static auto if_architecture_is_le(T fn) { template constexpr static auto if_architecture_is_gt(T fn) { if constexpr (sycl::detail::device_architecture_comparison_aot( - [](architecture a, architecture b) { return a > b; })) { + [](architecture a, architecture b) constexpr { + return a > b; + })) { fn(); return sycl::detail::if_architecture_helper{}; } else { @@ -1080,7 +1086,9 @@ constexpr static auto if_architecture_is_gt(T fn) { template constexpr static auto if_architecture_is_ge(T fn) { if constexpr (sycl::detail::device_architecture_comparison_aot( - [](architecture a, architecture b) { return a >= b; })) { + [](architecture a, architecture b) constexpr { + return a >= b; + })) { fn(); return sycl::detail::if_architecture_helper{}; } else { @@ -1096,9 +1104,13 @@ constexpr static auto if_architecture_is_ge(T fn) { template constexpr static auto if_architecture_is_between(T fn) { if constexpr (sycl::detail::device_architecture_comparison_aot( - [](architecture a, architecture b) { return a >= b; }) && + [](architecture a, architecture b) constexpr { + return a >= b; + }) && sycl::detail::device_architecture_comparison_aot( - [](architecture a, architecture b) { return a <= b; })) { + [](architecture a, architecture b) constexpr { + return a <= b; + })) { fn(); return sycl::detail::if_architecture_helper{}; } else { From a0862a671da83237bd654ac92e8da62372c8b394 Mon Sep 17 00:00:00 2001 From: Dmitry Vodoypanov Date: Sun, 18 Feb 2024 15:26:44 -0800 Subject: [PATCH 05/11] Remove unused header include --- .../include/sycl/ext/oneapi/experimental/device_architecture.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp index 401de7cc0666d..719ddf8180cca 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp @@ -9,7 +9,6 @@ #pragma once #include -#include namespace sycl { inline namespace _V1 { From ac7bfdad27924d3545181d82545ca227250a5bb2 Mon Sep 17 00:00:00 2001 From: Dmitry Vodoypanov Date: Mon, 19 Feb 2024 00:46:14 -0800 Subject: [PATCH 06/11] =?UTF-8?q?Try=20to=20fix=20'error:=20expression=20?= =?UTF-8?q?=E2=80=98=E2=80=99=20is=20not=20a=20constant=20expressi?= =?UTF-8?q?on'=20again?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../sycl/ext/oneapi/experimental/device_architecture.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp index 719ddf8180cca..ba13dd9a7ddf1 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp @@ -1033,10 +1033,11 @@ constexpr static auto if_architecture_is(T fn) { /// family as @tparam Arch and compares less than @tparam Arch. template constexpr static auto if_architecture_is_lt(T fn) { + constexpr auto compare_op_lt = [](architecture a, architecture b) constexpr { + return a < b; + }; if constexpr (sycl::detail::device_architecture_comparison_aot( - [](architecture a, architecture b) constexpr { - return a < b; - })) { + compare_op_lt)) { fn(); return sycl::detail::if_architecture_helper{}; } else { From 7d6d225264b46e3abe9cccb0025b19272d1180ff Mon Sep 17 00:00:00 2001 From: Dmitry Vodoypanov Date: Mon, 19 Feb 2024 02:31:01 -0800 Subject: [PATCH 07/11] Try to fix again --- .../experimental/device_architecture.hpp | 68 +++++++------------ 1 file changed, 25 insertions(+), 43 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp index ba13dd9a7ddf1..9b326c66f5e9c 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp @@ -837,6 +837,19 @@ constexpr static bool device_architecture_comparison_aot(Compare comp) { return false; } +constexpr auto device_arch_compare_op_lt = + [](ext::oneapi::experimental::architecture a, + ext::oneapi::experimental::architecture b) constexpr { return a < b; }; +constexpr auto device_arch_compare_op_le = + [](ext::oneapi::experimental::architecture a, + ext::oneapi::experimental::architecture b) constexpr { return a <= b; }; +constexpr auto device_arch_compare_op_gt = + [](ext::oneapi::experimental::architecture a, + ext::oneapi::experimental::architecture b) constexpr { return a > b; }; +constexpr auto device_arch_compare_op_ge = + [](ext::oneapi::experimental::architecture a, + ext::oneapi::experimental::architecture b) constexpr { return a >= b; }; + // Helper object used to implement "else_if_architecture_is", // "else_if_architecture_is_*" and "otherwise". The "MakeCall" template // parameter tells whether a previous clause in the "if-elseif-elseif ..." chain @@ -884,10 +897,7 @@ template class if_architecture_helper { constexpr auto else_if_architecture_is_lt(T fn) { if constexpr (MakeCall && sycl::detail::device_architecture_comparison_aot( - [](ext::oneapi::experimental::architecture a, - ext::oneapi::experimental::architecture b) constexpr { - return a < b; - })) { + device_arch_compare_op_lt)) { fn(); return sycl::detail::if_architecture_helper{}; } else { @@ -905,10 +915,7 @@ template class if_architecture_helper { constexpr auto else_if_architecture_is_le(T fn) { if constexpr (MakeCall && sycl::detail::device_architecture_comparison_aot( - [](ext::oneapi::experimental::architecture a, - ext::oneapi::experimental::architecture b) constexpr { - return a <= b; - })) { + device_arch_compare_op_le)) { fn(); return sycl::detail::if_architecture_helper{}; } else { @@ -925,10 +932,7 @@ template class if_architecture_helper { constexpr auto else_if_architecture_is_gt(T fn) { if constexpr (MakeCall && sycl::detail::device_architecture_comparison_aot( - [](ext::oneapi::experimental::architecture a, - ext::oneapi::experimental::architecture b) constexpr { - return a > b; - })) { + device_arch_compare_op_gt)) { fn(); return sycl::detail::if_architecture_helper{}; } else { @@ -946,10 +950,7 @@ template class if_architecture_helper { constexpr auto else_if_architecture_is_ge(T fn) { if constexpr (MakeCall && sycl::detail::device_architecture_comparison_aot( - [](ext::oneapi::experimental::architecture a, - ext::oneapi::experimental::architecture b) constexpr { - return a >= b; - })) { + device_arch_compare_op_ge)) { fn(); return sycl::detail::if_architecture_helper{}; } else { @@ -968,15 +969,9 @@ template class if_architecture_helper { constexpr auto else_if_architecture_is_between(T fn) { if constexpr (MakeCall && sycl::detail::device_architecture_comparison_aot( - [](ext::oneapi::experimental::architecture a, - ext::oneapi::experimental::architecture b) constexpr { - return a >= b; - }) && + device_arch_compare_op_ge) && sycl::detail::device_architecture_comparison_aot( - [](ext::oneapi::experimental::architecture a, - ext::oneapi::experimental::architecture b) constexpr { - return a <= b; - })) { + device_arch_compare_op_le)) { fn(); return sycl::detail::if_architecture_helper{}; } else { @@ -1033,11 +1028,8 @@ constexpr static auto if_architecture_is(T fn) { /// family as @tparam Arch and compares less than @tparam Arch. template constexpr static auto if_architecture_is_lt(T fn) { - constexpr auto compare_op_lt = [](architecture a, architecture b) constexpr { - return a < b; - }; if constexpr (sycl::detail::device_architecture_comparison_aot( - compare_op_lt)) { + sycl::detail::device_arch_compare_op_lt)) { fn(); return sycl::detail::if_architecture_helper{}; } else { @@ -1052,9 +1044,7 @@ constexpr static auto if_architecture_is_lt(T fn) { template constexpr static auto if_architecture_is_le(T fn) { if constexpr (sycl::detail::device_architecture_comparison_aot( - [](architecture a, architecture b) constexpr { - return a <= b; - })) { + sycl::detail::device_arch_compare_op_le)) { fn(); return sycl::detail::if_architecture_helper{}; } else { @@ -1069,9 +1059,7 @@ constexpr static auto if_architecture_is_le(T fn) { template constexpr static auto if_architecture_is_gt(T fn) { if constexpr (sycl::detail::device_architecture_comparison_aot( - [](architecture a, architecture b) constexpr { - return a > b; - })) { + sycl::detail::device_arch_compare_op_gt)) { fn(); return sycl::detail::if_architecture_helper{}; } else { @@ -1086,9 +1074,7 @@ constexpr static auto if_architecture_is_gt(T fn) { template constexpr static auto if_architecture_is_ge(T fn) { if constexpr (sycl::detail::device_architecture_comparison_aot( - [](architecture a, architecture b) constexpr { - return a >= b; - })) { + sycl::detail::device_arch_compare_op_ge)) { fn(); return sycl::detail::if_architecture_helper{}; } else { @@ -1104,13 +1090,9 @@ constexpr static auto if_architecture_is_ge(T fn) { template constexpr static auto if_architecture_is_between(T fn) { if constexpr (sycl::detail::device_architecture_comparison_aot( - [](architecture a, architecture b) constexpr { - return a >= b; - }) && + sycl::detail::device_arch_compare_op_ge) && sycl::detail::device_architecture_comparison_aot( - [](architecture a, architecture b) constexpr { - return a <= b; - })) { + sycl::detail::device_arch_compare_op_le)) { fn(); return sycl::detail::if_architecture_helper{}; } else { From efef517fdbb880172e9e2fc4657c3be3a7acf7db Mon Sep 17 00:00:00 2001 From: Dmitry Vodoypanov Date: Mon, 19 Feb 2024 05:01:31 -0800 Subject: [PATCH 08/11] Fix typo in comment --- sycl/include/sycl/device.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index 2b65e077bd08e..763304f84c3cb 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -267,7 +267,7 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase { /// the function. bool ext_oneapi_architecture_is(ext::oneapi::experimental::architecture arch); - /// Indicates if the SYCL device architecture equals is in the category passed + /// Indicates if the SYCL device architecture is in the category passed /// to the function. /// /// \param category is one of the architecture categories from arch_category From 5a8efb0ca2b8a44dd4bace4d3163415949bda28f Mon Sep 17 00:00:00 2001 From: Dmitry Vodoypanov Date: Wed, 28 Feb 2024 06:37:11 -0800 Subject: [PATCH 09/11] Improve uniqueness of arch ID --- .../experimental/device_architecture.hpp | 214 ++++++++++-------- 1 file changed, 121 insertions(+), 93 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp index 9b326c66f5e9c..69b0afc60e704 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp @@ -8,13 +8,14 @@ #pragma once +#include // for uint64_t #include namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { -enum class architecture { +enum class architecture : uint64_t { // If new element is added to this enum: // // Update @@ -28,116 +29,131 @@ enum class architecture { // - the unique ID of the new architecture in SYCL RT source code to support // querying the device architecture // - x86_64 = 0x00000000, + // Important note about keeping architecture IDs below unique: + // - the architecture ID must be a hex number with 16 digits + // - the architecture ID must suit the following template: + // 0x AAAA BB CCCCCCCC DD (without spaces), where + // - AAAA is 4-digit PCI vendor ID of that architecture + // - BB is 2-digit unique number of vendor's accelerator. It is 00 by + // default. E.g., for Intel, the PCI vendor ID is the same for all + // accelerators: for CPU and for GPU. In this case BB can be equal to + // 00 for CPU and 01 for GPU + // - AAAABB number must be unique to the only one category from + // arch_category enum below. Two or more categories cannot share the + // same AAAABB number + // - CCCCCCCC is 8-digit number of architecture itself. It must be + // unique for all architectures inside the category + // - DD is 2-digit number reserved for future unexpected modifications + // to keep uniqueness. It should be always 00 for now + // + x86_64 = 0x0000000000000000, // // Intel CPU architectures // - // The requirement for the unique ID for intel_cpu_* architectures below is: - // - the ID must start with 0x0 (to avoid the integer overflow) - // - then goes Intel's vendor ID from underlied backend (which is 8086) - // - the ID ends with the architecture ID from the DEVICE_IP_VERSION extension - // of underlied backend - intel_cpu_spr = 0x08086008, - intel_cpu_gnr = 0x08086009, + // AAAA is 8086, BB is 00, + // CCCCCCCC is the architecture ID from the DEVICE_IP_VERSION extension of + // underlied backend, + // DD is 00 + intel_cpu_spr = 0x8086000000000800, + intel_cpu_gnr = 0x8086000000000900, // // Intel GPU architectures // - // The requirement for the unique ID for intel_gpu_* architectures below is: - // - the ID is GMDID of that architecture - intel_gpu_bdw = 0x02000000, - intel_gpu_skl = 0x02400009, - intel_gpu_kbl = 0x02404009, - intel_gpu_cfl = 0x02408009, - intel_gpu_apl = 0x0240c000, + // AAAA is 8086, BB is 01, + // CCCCCCCC is GMDID of that architecture, + // DD is 00 + intel_gpu_bdw = 0x8086010200000000, + intel_gpu_skl = 0x8086010240000900, + intel_gpu_kbl = 0x8086010240400900, + intel_gpu_cfl = 0x8086010240800900, + intel_gpu_apl = 0x8086010240c00000, intel_gpu_bxt = intel_gpu_apl, - intel_gpu_glk = 0x02410000, - intel_gpu_whl = 0x02414000, - intel_gpu_aml = 0x02418000, - intel_gpu_cml = 0x0241c000, - intel_gpu_icllp = 0x02c00000, - intel_gpu_ehl = 0x02c08000, + intel_gpu_glk = 0x8086010241000000, + intel_gpu_whl = 0x8086010241400000, + intel_gpu_aml = 0x8086010241800000, + intel_gpu_cml = 0x8086010241c00000, + intel_gpu_icllp = 0x80860102c0000000, + intel_gpu_ehl = 0x80860102c0800000, intel_gpu_jsl = intel_gpu_ehl, - intel_gpu_tgllp = 0x03000000, - intel_gpu_rkl = 0x03004000, - intel_gpu_adl_s = 0x03008000, + intel_gpu_tgllp = 0x8086010300000000, + intel_gpu_rkl = 0x8086010300400000, + intel_gpu_adl_s = 0x8086010300800000, intel_gpu_rpl_s = intel_gpu_adl_s, - intel_gpu_adl_p = 0x0300c000, - intel_gpu_adl_n = 0x03010000, - intel_gpu_dg1 = 0x03028000, - intel_gpu_acm_g10 = 0x030dc008, + intel_gpu_adl_p = 0x8086010300c00000, + intel_gpu_adl_n = 0x8086010301000000, + intel_gpu_dg1 = 0x8086010302800000, + intel_gpu_acm_g10 = 0x808601030dc00800, intel_gpu_dg2_g10 = intel_gpu_acm_g10, - intel_gpu_acm_g11 = 0x030e0005, + intel_gpu_acm_g11 = 0x808601030e000500, intel_gpu_dg2_g11 = intel_gpu_acm_g11, - intel_gpu_acm_g12 = 0x030e4000, + intel_gpu_acm_g12 = 0x808601030e400000, intel_gpu_dg2_g12 = intel_gpu_acm_g12, - intel_gpu_pvc = 0x030f0007, - intel_gpu_pvc_vg = 0x030f4007, + intel_gpu_pvc = 0x808601030f000700, + intel_gpu_pvc_vg = 0x808601030f400700, // // NVIDIA architectures // - // The requirement for the unique ID for nvidia_gpu_* architectures below is: - // - the ID must start with NVIDIA's vendor ID from underlied backend (which - // is 0x10de) - // - the ID must end with SM version ID of that architecture - nvidia_gpu_sm_50 = 0x10de0050, - nvidia_gpu_sm_52 = 0x10de0052, - nvidia_gpu_sm_53 = 0x10de0053, - nvidia_gpu_sm_60 = 0x10de0060, - nvidia_gpu_sm_61 = 0x10de0061, - nvidia_gpu_sm_62 = 0x10de0062, - nvidia_gpu_sm_70 = 0x10de0070, - nvidia_gpu_sm_72 = 0x10de0072, - nvidia_gpu_sm_75 = 0x10de0075, - nvidia_gpu_sm_80 = 0x10de0080, - nvidia_gpu_sm_86 = 0x10de0086, - nvidia_gpu_sm_87 = 0x10de0087, - nvidia_gpu_sm_89 = 0x10de0089, - nvidia_gpu_sm_90 = 0x10de0090, + // AAAA is 10de, BB is 00, + // CCCCCCCC is the SM version ID of that architecture, + // DD is 00 + nvidia_gpu_sm_50 = 0x10de000000005000, + nvidia_gpu_sm_52 = 0x10de000000005200, + nvidia_gpu_sm_53 = 0x10de000000005300, + nvidia_gpu_sm_60 = 0x10de000000006000, + nvidia_gpu_sm_61 = 0x10de000000006100, + nvidia_gpu_sm_62 = 0x10de000000006200, + nvidia_gpu_sm_70 = 0x10de000000007000, + nvidia_gpu_sm_72 = 0x10de000000007200, + nvidia_gpu_sm_75 = 0x10de000000007500, + nvidia_gpu_sm_80 = 0x10de000000008000, + nvidia_gpu_sm_86 = 0x10de000000008600, + nvidia_gpu_sm_87 = 0x10de000000008700, + nvidia_gpu_sm_89 = 0x10de000000008900, + nvidia_gpu_sm_90 = 0x10de000000009000, // // AMD architectures // - // The requirement for the unique ID for amd_gpu_* architectures below is: - // - the ID must start with AMD's vendor ID from underlied backend (which is - // 0x1002) - // - the ID must end with GFX version ID of that architecture - amd_gpu_gfx700 = 0x10020700, - amd_gpu_gfx701 = 0x10020701, - amd_gpu_gfx702 = 0x10020702, - amd_gpu_gfx801 = 0x10020801, - amd_gpu_gfx802 = 0x10020802, - amd_gpu_gfx803 = 0x10020803, - amd_gpu_gfx805 = 0x10020805, - amd_gpu_gfx810 = 0x10020810, - amd_gpu_gfx900 = 0x10020900, - amd_gpu_gfx902 = 0x10020902, - amd_gpu_gfx904 = 0x10020904, - amd_gpu_gfx906 = 0x10020906, - amd_gpu_gfx908 = 0x10020908, - amd_gpu_gfx909 = 0x10020909, - amd_gpu_gfx90a = 0x1002090a, - amd_gpu_gfx90c = 0x1002090c, - amd_gpu_gfx940 = 0x10020940, - amd_gpu_gfx941 = 0x10020941, - amd_gpu_gfx942 = 0x10020942, - amd_gpu_gfx1010 = 0x10021010, - amd_gpu_gfx1011 = 0x10021011, - amd_gpu_gfx1012 = 0x10021012, - amd_gpu_gfx1013 = 0x10021013, - amd_gpu_gfx1030 = 0x10021030, - amd_gpu_gfx1031 = 0x10021031, - amd_gpu_gfx1032 = 0x10021032, - amd_gpu_gfx1033 = 0x10021033, - amd_gpu_gfx1034 = 0x10021034, - amd_gpu_gfx1035 = 0x10021035, - amd_gpu_gfx1036 = 0x10021036, - amd_gpu_gfx1100 = 0x10021100, - amd_gpu_gfx1101 = 0x10021101, - amd_gpu_gfx1102 = 0x10021102, - amd_gpu_gfx1103 = 0x10021103, - amd_gpu_gfx1150 = 0x10021150, - amd_gpu_gfx1151 = 0x10021151, - amd_gpu_gfx1200 = 0x10021200, - amd_gpu_gfx1201 = 0x10021201, + // AAAA is 1002, BB is 00, + // CCCCCCCC is the GFX version ID of that architecture, + // DD is 00 + amd_gpu_gfx700 = 0x1002000000070000, + amd_gpu_gfx701 = 0x1002000000070100, + amd_gpu_gfx702 = 0x1002000000070200, + amd_gpu_gfx801 = 0x1002000000080100, + amd_gpu_gfx802 = 0x1002000000080200, + amd_gpu_gfx803 = 0x1002000000080300, + amd_gpu_gfx805 = 0x1002000000080500, + amd_gpu_gfx810 = 0x1002000000081000, + amd_gpu_gfx900 = 0x1002000000090000, + amd_gpu_gfx902 = 0x1002000000090200, + amd_gpu_gfx904 = 0x1002000000090400, + amd_gpu_gfx906 = 0x1002000000090600, + amd_gpu_gfx908 = 0x1002000000090800, + amd_gpu_gfx909 = 0x1002000000090900, + amd_gpu_gfx90a = 0x1002000000090a00, + amd_gpu_gfx90c = 0x1002000000090c00, + amd_gpu_gfx940 = 0x1002000000094000, + amd_gpu_gfx941 = 0x1002000000094100, + amd_gpu_gfx942 = 0x1002000000094200, + amd_gpu_gfx1010 = 0x1002000000101000, + amd_gpu_gfx1011 = 0x1002000000101100, + amd_gpu_gfx1012 = 0x1002000000101200, + amd_gpu_gfx1013 = 0x1002000000101300, + amd_gpu_gfx1030 = 0x1002000000103000, + amd_gpu_gfx1031 = 0x1002000000103100, + amd_gpu_gfx1032 = 0x1002000000103200, + amd_gpu_gfx1033 = 0x1002000000103300, + amd_gpu_gfx1034 = 0x1002000000103400, + amd_gpu_gfx1035 = 0x1002000000103500, + amd_gpu_gfx1036 = 0x1002000000103600, + amd_gpu_gfx1100 = 0x1002000000110000, + amd_gpu_gfx1101 = 0x1002000000110100, + amd_gpu_gfx1102 = 0x1002000000110200, + amd_gpu_gfx1103 = 0x1002000000110300, + amd_gpu_gfx1150 = 0x1002000000115000, + amd_gpu_gfx1151 = 0x1002000000115100, + amd_gpu_gfx1200 = 0x1002000000120000, + amd_gpu_gfx1201 = 0x1002000000120100, intel_gpu_8_0_0 = intel_gpu_bdw, intel_gpu_9_0_9 = intel_gpu_skl, intel_gpu_9_1_9 = intel_gpu_kbl, @@ -153,6 +169,18 @@ enum class architecture { }; enum class arch_category { + // If new element is added to this enum: + // + // Add + // - "detail::min__architecture" variable below + // - "detail::max__architecture" variable below + // + // Update + // - "detail::get_category_min_architecture()" function below + // - "detail::get_category_max_architecture()" function below + // - "detail::get_device_architecture_category()" function below + // - sycl_ext_oneapi_device_architecture specification doc + // intel_gpu = 0, nvidia_gpu = 1, amd_gpu = 2, From bf270fcaeae5fe406642690353f510d155c60c05 Mon Sep 17 00:00:00 2001 From: Dmitry Vodoypanov Date: Thu, 29 Feb 2024 05:44:44 -0800 Subject: [PATCH 10/11] Simplify architecture IDs --- .../experimental/device_architecture.hpp | 196 +++++++++--------- 1 file changed, 95 insertions(+), 101 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp index 69b0afc60e704..4b99e67e3fea3 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp @@ -32,128 +32,122 @@ enum class architecture : uint64_t { // Important note about keeping architecture IDs below unique: // - the architecture ID must be a hex number with 16 digits // - the architecture ID must suit the following template: - // 0x AAAA BB CCCCCCCC DD (without spaces), where - // - AAAA is 4-digit PCI vendor ID of that architecture - // - BB is 2-digit unique number of vendor's accelerator. It is 00 by - // default. E.g., for Intel, the PCI vendor ID is the same for all - // accelerators: for CPU and for GPU. In this case BB can be equal to - // 00 for CPU and 01 for GPU - // - AAAABB number must be unique to the only one category from - // arch_category enum below. Two or more categories cannot share the - // same AAAABB number - // - CCCCCCCC is 8-digit number of architecture itself. It must be + // 0x AA BBBBBBBBBBBB CC (without spaces), where + // - AA is 2-digit ID of category which must be unique + // - BBBBBBBBBBBB is 12-digit number of architecture itself. It must be // unique for all architectures inside the category - // - DD is 2-digit number reserved for future unexpected modifications + // - CC is 2-digit number reserved for future unexpected modifications // to keep uniqueness. It should be always 00 for now // - x86_64 = 0x0000000000000000, + x86_64 = 0x9900000000000000, // // Intel CPU architectures // - // AAAA is 8086, BB is 00, - // CCCCCCCC is the architecture ID from the DEVICE_IP_VERSION extension of + // AAAA is 03, + // BBBBBBBBBBBB is the architecture ID from the DEVICE_IP_VERSION extension of // underlied backend, - // DD is 00 - intel_cpu_spr = 0x8086000000000800, - intel_cpu_gnr = 0x8086000000000900, + // CC is 00 + intel_cpu_spr = 0x0300000000000800, + intel_cpu_gnr = 0x0300000000000900, // // Intel GPU architectures // - // AAAA is 8086, BB is 01, - // CCCCCCCC is GMDID of that architecture, - // DD is 00 - intel_gpu_bdw = 0x8086010200000000, - intel_gpu_skl = 0x8086010240000900, - intel_gpu_kbl = 0x8086010240400900, - intel_gpu_cfl = 0x8086010240800900, - intel_gpu_apl = 0x8086010240c00000, + // AA is 00, + // BBBBBBBBBBBB is GMDID of that architecture (4 zeros + 8-digit GMDID + // itself), + // CC is 00 + intel_gpu_bdw = 0x0000000200000000, + intel_gpu_skl = 0x0000000240000900, + intel_gpu_kbl = 0x0000000240400900, + intel_gpu_cfl = 0x0000000240800900, + intel_gpu_apl = 0x0000000240c00000, intel_gpu_bxt = intel_gpu_apl, - intel_gpu_glk = 0x8086010241000000, - intel_gpu_whl = 0x8086010241400000, - intel_gpu_aml = 0x8086010241800000, - intel_gpu_cml = 0x8086010241c00000, - intel_gpu_icllp = 0x80860102c0000000, - intel_gpu_ehl = 0x80860102c0800000, + intel_gpu_glk = 0x0000000241000000, + intel_gpu_whl = 0x0000000241400000, + intel_gpu_aml = 0x0000000241800000, + intel_gpu_cml = 0x0000000241c00000, + intel_gpu_icllp = 0x00000002c0000000, + intel_gpu_ehl = 0x00000002c0800000, intel_gpu_jsl = intel_gpu_ehl, - intel_gpu_tgllp = 0x8086010300000000, - intel_gpu_rkl = 0x8086010300400000, - intel_gpu_adl_s = 0x8086010300800000, + intel_gpu_tgllp = 0x0000000300000000, + intel_gpu_rkl = 0x0000000300400000, + intel_gpu_adl_s = 0x0000000300800000, intel_gpu_rpl_s = intel_gpu_adl_s, - intel_gpu_adl_p = 0x8086010300c00000, - intel_gpu_adl_n = 0x8086010301000000, - intel_gpu_dg1 = 0x8086010302800000, - intel_gpu_acm_g10 = 0x808601030dc00800, + intel_gpu_adl_p = 0x0000000300c00000, + intel_gpu_adl_n = 0x0000000301000000, + intel_gpu_dg1 = 0x0000000302800000, + intel_gpu_acm_g10 = 0x000000030dc00800, intel_gpu_dg2_g10 = intel_gpu_acm_g10, - intel_gpu_acm_g11 = 0x808601030e000500, + intel_gpu_acm_g11 = 0x000000030e000500, intel_gpu_dg2_g11 = intel_gpu_acm_g11, - intel_gpu_acm_g12 = 0x808601030e400000, + intel_gpu_acm_g12 = 0x000000030e400000, intel_gpu_dg2_g12 = intel_gpu_acm_g12, - intel_gpu_pvc = 0x808601030f000700, - intel_gpu_pvc_vg = 0x808601030f400700, + intel_gpu_pvc = 0x000000030f000700, + intel_gpu_pvc_vg = 0x000000030f400700, // // NVIDIA architectures // - // AAAA is 10de, BB is 00, - // CCCCCCCC is the SM version ID of that architecture, - // DD is 00 - nvidia_gpu_sm_50 = 0x10de000000005000, - nvidia_gpu_sm_52 = 0x10de000000005200, - nvidia_gpu_sm_53 = 0x10de000000005300, - nvidia_gpu_sm_60 = 0x10de000000006000, - nvidia_gpu_sm_61 = 0x10de000000006100, - nvidia_gpu_sm_62 = 0x10de000000006200, - nvidia_gpu_sm_70 = 0x10de000000007000, - nvidia_gpu_sm_72 = 0x10de000000007200, - nvidia_gpu_sm_75 = 0x10de000000007500, - nvidia_gpu_sm_80 = 0x10de000000008000, - nvidia_gpu_sm_86 = 0x10de000000008600, - nvidia_gpu_sm_87 = 0x10de000000008700, - nvidia_gpu_sm_89 = 0x10de000000008900, - nvidia_gpu_sm_90 = 0x10de000000009000, + // AA is 01, + // BBBBBBBBBBBB is the SM version ID of that architecture, + // CC is 00 + nvidia_gpu_sm_50 = 0x0100000000005000, + nvidia_gpu_sm_52 = 0x0100000000005200, + nvidia_gpu_sm_53 = 0x0100000000005300, + nvidia_gpu_sm_60 = 0x0100000000006000, + nvidia_gpu_sm_61 = 0x0100000000006100, + nvidia_gpu_sm_62 = 0x0100000000006200, + nvidia_gpu_sm_70 = 0x0100000000007000, + nvidia_gpu_sm_72 = 0x0100000000007200, + nvidia_gpu_sm_75 = 0x0100000000007500, + nvidia_gpu_sm_80 = 0x0100000000008000, + nvidia_gpu_sm_86 = 0x0100000000008600, + nvidia_gpu_sm_87 = 0x0100000000008700, + nvidia_gpu_sm_89 = 0x0100000000008900, + nvidia_gpu_sm_90 = 0x0100000000009000, // // AMD architectures // - // AAAA is 1002, BB is 00, - // CCCCCCCC is the GFX version ID of that architecture, - // DD is 00 - amd_gpu_gfx700 = 0x1002000000070000, - amd_gpu_gfx701 = 0x1002000000070100, - amd_gpu_gfx702 = 0x1002000000070200, - amd_gpu_gfx801 = 0x1002000000080100, - amd_gpu_gfx802 = 0x1002000000080200, - amd_gpu_gfx803 = 0x1002000000080300, - amd_gpu_gfx805 = 0x1002000000080500, - amd_gpu_gfx810 = 0x1002000000081000, - amd_gpu_gfx900 = 0x1002000000090000, - amd_gpu_gfx902 = 0x1002000000090200, - amd_gpu_gfx904 = 0x1002000000090400, - amd_gpu_gfx906 = 0x1002000000090600, - amd_gpu_gfx908 = 0x1002000000090800, - amd_gpu_gfx909 = 0x1002000000090900, - amd_gpu_gfx90a = 0x1002000000090a00, - amd_gpu_gfx90c = 0x1002000000090c00, - amd_gpu_gfx940 = 0x1002000000094000, - amd_gpu_gfx941 = 0x1002000000094100, - amd_gpu_gfx942 = 0x1002000000094200, - amd_gpu_gfx1010 = 0x1002000000101000, - amd_gpu_gfx1011 = 0x1002000000101100, - amd_gpu_gfx1012 = 0x1002000000101200, - amd_gpu_gfx1013 = 0x1002000000101300, - amd_gpu_gfx1030 = 0x1002000000103000, - amd_gpu_gfx1031 = 0x1002000000103100, - amd_gpu_gfx1032 = 0x1002000000103200, - amd_gpu_gfx1033 = 0x1002000000103300, - amd_gpu_gfx1034 = 0x1002000000103400, - amd_gpu_gfx1035 = 0x1002000000103500, - amd_gpu_gfx1036 = 0x1002000000103600, - amd_gpu_gfx1100 = 0x1002000000110000, - amd_gpu_gfx1101 = 0x1002000000110100, - amd_gpu_gfx1102 = 0x1002000000110200, - amd_gpu_gfx1103 = 0x1002000000110300, - amd_gpu_gfx1150 = 0x1002000000115000, - amd_gpu_gfx1151 = 0x1002000000115100, - amd_gpu_gfx1200 = 0x1002000000120000, - amd_gpu_gfx1201 = 0x1002000000120100, + // AA is 02, + // BBBBBBBBBBBB is the GFX version ID of that architecture, + // CC is 00 + amd_gpu_gfx700 = 0x0200000000070000, + amd_gpu_gfx701 = 0x0200000000070100, + amd_gpu_gfx702 = 0x0200000000070200, + amd_gpu_gfx801 = 0x0200000000080100, + amd_gpu_gfx802 = 0x0200000000080200, + amd_gpu_gfx803 = 0x0200000000080300, + amd_gpu_gfx805 = 0x0200000000080500, + amd_gpu_gfx810 = 0x0200000000081000, + amd_gpu_gfx900 = 0x0200000000090000, + amd_gpu_gfx902 = 0x0200000000090200, + amd_gpu_gfx904 = 0x0200000000090400, + amd_gpu_gfx906 = 0x0200000000090600, + amd_gpu_gfx908 = 0x0200000000090800, + amd_gpu_gfx909 = 0x0200000000090900, + amd_gpu_gfx90a = 0x0200000000090a00, + amd_gpu_gfx90c = 0x0200000000090c00, + amd_gpu_gfx940 = 0x0200000000094000, + amd_gpu_gfx941 = 0x0200000000094100, + amd_gpu_gfx942 = 0x0200000000094200, + amd_gpu_gfx1010 = 0x0200000000101000, + amd_gpu_gfx1011 = 0x0200000000101100, + amd_gpu_gfx1012 = 0x0200000000101200, + amd_gpu_gfx1013 = 0x0200000000101300, + amd_gpu_gfx1030 = 0x0200000000103000, + amd_gpu_gfx1031 = 0x0200000000103100, + amd_gpu_gfx1032 = 0x0200000000103200, + amd_gpu_gfx1033 = 0x0200000000103300, + amd_gpu_gfx1034 = 0x0200000000103400, + amd_gpu_gfx1035 = 0x0200000000103500, + amd_gpu_gfx1036 = 0x0200000000103600, + amd_gpu_gfx1100 = 0x0200000000110000, + amd_gpu_gfx1101 = 0x0200000000110100, + amd_gpu_gfx1102 = 0x0200000000110200, + amd_gpu_gfx1103 = 0x0200000000110300, + amd_gpu_gfx1150 = 0x0200000000115000, + amd_gpu_gfx1151 = 0x0200000000115100, + amd_gpu_gfx1200 = 0x0200000000120000, + amd_gpu_gfx1201 = 0x0200000000120100, intel_gpu_8_0_0 = intel_gpu_bdw, intel_gpu_9_0_9 = intel_gpu_skl, intel_gpu_9_1_9 = intel_gpu_kbl, @@ -184,7 +178,7 @@ enum class arch_category { intel_gpu = 0, nvidia_gpu = 1, amd_gpu = 2, - // TODO: add intel_cpu + // TODO: add intel_cpu = 3, }; } // namespace ext::oneapi::experimental From 28a152fa60bc85ccf8e3237fcb0af422deb30352 Mon Sep 17 00:00:00 2001 From: Dmitry Vodoypanov Date: Tue, 5 Mar 2024 04:36:43 -0800 Subject: [PATCH 11/11] Apply CR changes --- .../experimental/device_architecture.hpp | 29 +++++++++---------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp index 4b99e67e3fea3..fd38003944418 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp @@ -32,30 +32,29 @@ enum class architecture : uint64_t { // Important note about keeping architecture IDs below unique: // - the architecture ID must be a hex number with 16 digits // - the architecture ID must suit the following template: - // 0x AA BBBBBBBBBBBB CC (without spaces), where - // - AA is 2-digit ID of category which must be unique - // - BBBBBBBBBBBB is 12-digit number of architecture itself. It must be - // unique for all architectures inside the category - // - CC is 2-digit number reserved for future unexpected modifications + // 0x AA BBBB CCCCCCCC DD (without spaces), where + // - AA is 2-digit ID of the architecture family which must be unique + // - BBBB is 4-digit number reserved for future modifications + // to keep uniqueness. It should be always 0000 for now + // - CCCCCCCC is 8-digit number of architecture itself. It must be + // unique for all architectures inside the family + // - DD is 2-digit number reserved for future unexpected modifications // to keep uniqueness. It should be always 00 for now // x86_64 = 0x9900000000000000, // // Intel CPU architectures // - // AAAA is 03, - // BBBBBBBBBBBB is the architecture ID from the DEVICE_IP_VERSION extension of - // underlied backend, - // CC is 00 + // AA is 03, + // CCCCCCCC is the architecture ID from the DEVICE_IP_VERSION extension of + // underlied backend intel_cpu_spr = 0x0300000000000800, intel_cpu_gnr = 0x0300000000000900, // // Intel GPU architectures // // AA is 00, - // BBBBBBBBBBBB is GMDID of that architecture (4 zeros + 8-digit GMDID - // itself), - // CC is 00 + // CCCCCCCC is GMDID of that architecture intel_gpu_bdw = 0x0000000200000000, intel_gpu_skl = 0x0000000240000900, intel_gpu_kbl = 0x0000000240400900, @@ -88,8 +87,7 @@ enum class architecture : uint64_t { // NVIDIA architectures // // AA is 01, - // BBBBBBBBBBBB is the SM version ID of that architecture, - // CC is 00 + // CCCCCCCC is the SM version ID of that architecture nvidia_gpu_sm_50 = 0x0100000000005000, nvidia_gpu_sm_52 = 0x0100000000005200, nvidia_gpu_sm_53 = 0x0100000000005300, @@ -108,8 +106,7 @@ enum class architecture : uint64_t { // AMD architectures // // AA is 02, - // BBBBBBBBBBBB is the GFX version ID of that architecture, - // CC is 00 + // CCCCCCCC is the GFX version ID of that architecture amd_gpu_gfx700 = 0x0200000000070000, amd_gpu_gfx701 = 0x0200000000070100, amd_gpu_gfx702 = 0x0200000000070200,