From 2c4f5f7c5c1f3d7fcb3bf4b428d85fa3afa8ee18 Mon Sep 17 00:00:00 2001
From: Dmitry Vodoypanov <dmitry.vodopyanov@intel.com>
Date: Sat, 17 Feb 2024 10:17:07 -0800
Subject: [PATCH 01/11] [SYCL] Implement device arch comparison according to
 sycl_ext_oneapi_device_architecture spec update

This patch implements device AOT and host APIs for device architecture
comparison in accordance to [spec update](https://github.com/intel/llvm/pull/12259)
---
 sycl/include/sycl/device.hpp                  |  11 +
 .../experimental/device_architecture.hpp      | 957 +++++++++++++-----
 sycl/source/detail/device_impl.hpp            |  13 +
 sycl/source/device.cpp                        |   6 +
 ..._architecture_comparison_on_device_aot.cpp | 199 ++++
 ...device_architecture_comparison_on_host.cpp |  42 +
 6 files changed, 975 insertions(+), 253 deletions(-)
 create mode 100644 sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp
 create mode 100644 sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp
diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp
index 03b5a8f10ca33..2b65e077bd08e 100644
--- a/sycl/include/sycl/device.hpp
+++ b/sycl/include/sycl/device.hpp
@@ -267,6 +267,17 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase<device> {
   /// the function.
   bool ext_oneapi_architecture_is(ext::oneapi::experimental::architecture arch);
 
+  /// Indicates if the SYCL device architecture equals is in the category passed
+  /// to the function.
+  ///
+  /// \param category is one of the architecture categories from arch_category
+  /// enum described in sycl_ext_oneapi_device_architecture specification.
+  ///
+  /// \return true if the SYCL device architecture is in the category passed to
+  /// the function.
+  bool
+  ext_oneapi_architecture_is(ext::oneapi::experimental::arch_category category);
+
 // TODO: Remove this diagnostics when __SYCL_WARN_IMAGE_ASPECT is removed.
 #if defined(__clang__)
 #pragma clang diagnostic pop
diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
index cbd76058825d9..dfc2de8665960 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
@@ -8,6 +8,9 @@
 
 #pragma once
 
+#include <optional>
+#include <unordered_map>
+
 namespace sycl {
 inline namespace _V1 {
 namespace ext::oneapi::experimental {
@@ -16,6 +19,8 @@ enum class architecture {
   // If new element is added to this enum:
   //
   // Update
+  //   - "detail::min_<category>_architecture" below if needed
+  //   - "detail::max_<category>_architecture" below if needed
   //   - sycl_ext_oneapi_device_architecture specification doc
   //   - "-fsycl-targets" description in sycl/doc/UsersManual.md
   //
@@ -24,92 +29,116 @@ enum class architecture {
   //   - the unique ID of the new architecture in SYCL RT source code to support
   //     querying the device architecture
   //
-  x86_64,
-  intel_cpu_spr,
-  intel_cpu_gnr,
-  intel_gpu_bdw,
-  intel_gpu_skl,
-  intel_gpu_kbl,
-  intel_gpu_cfl,
-  intel_gpu_apl,
+  x86_64 = 0x00000000,
+  //
+  // Intel CPU architectures
+  //
+  // The requirement for the unique ID for intel_cpu_* architectures below is:
+  // - the ID must start with 0x0 (to avoid the integer overflow)
+  // - then goes Intel's vendor ID from underlied backend (which is 8086)
+  // - the ID ends with the architecture ID from the DEVICE_IP_VERSION extension
+  //   of underlied backend
+  intel_cpu_spr = 0x08086008,
+  intel_cpu_gnr = 0x08086009,
+  //
+  // Intel GPU architectures
+  //
+  // The requirement for the unique ID for intel_gpu_* architectures below is:
+  // - the ID is GMDID of that architecture
+  intel_gpu_bdw = 0x02000000,
+  intel_gpu_skl = 0x02400009,
+  intel_gpu_kbl = 0x02404009,
+  intel_gpu_cfl = 0x02408009,
+  intel_gpu_apl = 0x0240c000,
   intel_gpu_bxt = intel_gpu_apl,
-  intel_gpu_glk,
-  intel_gpu_whl,
-  intel_gpu_aml,
-  intel_gpu_cml,
-  intel_gpu_icllp,
-  intel_gpu_ehl,
+  intel_gpu_glk = 0x02410000,
+  intel_gpu_whl = 0x02414000,
+  intel_gpu_aml = 0x02418000,
+  intel_gpu_cml = 0x0241c000,
+  intel_gpu_icllp = 0x02c00000,
+  intel_gpu_ehl = 0x02c08000,
   intel_gpu_jsl = intel_gpu_ehl,
-  intel_gpu_tgllp,
-  intel_gpu_rkl,
-  intel_gpu_adl_s,
+  intel_gpu_tgllp = 0x03000000,
+  intel_gpu_rkl = 0x03004000,
+  intel_gpu_adl_s = 0x03008000,
   intel_gpu_rpl_s = intel_gpu_adl_s,
-  intel_gpu_adl_p,
-  intel_gpu_adl_n,
-  intel_gpu_dg1,
-  intel_gpu_acm_g10,
+  intel_gpu_adl_p = 0x0300c000,
+  intel_gpu_adl_n = 0x03010000,
+  intel_gpu_dg1 = 0x03028000,
+  intel_gpu_acm_g10 = 0x030dc008,
   intel_gpu_dg2_g10 = intel_gpu_acm_g10,
-  intel_gpu_acm_g11,
+  intel_gpu_acm_g11 = 0x030e0005,
   intel_gpu_dg2_g11 = intel_gpu_acm_g11,
-  intel_gpu_acm_g12,
+  intel_gpu_acm_g12 = 0x030e4000,
   intel_gpu_dg2_g12 = intel_gpu_acm_g12,
-  intel_gpu_pvc,
-  intel_gpu_pvc_vg,
+  intel_gpu_pvc = 0x030f0007,
+  intel_gpu_pvc_vg = 0x030f4007,
+  //
   // NVIDIA architectures
-  nvidia_gpu_sm_50,
-  nvidia_gpu_sm_52,
-  nvidia_gpu_sm_53,
-  nvidia_gpu_sm_60,
-  nvidia_gpu_sm_61,
-  nvidia_gpu_sm_62,
-  nvidia_gpu_sm_70,
-  nvidia_gpu_sm_72,
-  nvidia_gpu_sm_75,
-  nvidia_gpu_sm_80,
-  nvidia_gpu_sm_86,
-  nvidia_gpu_sm_87,
-  nvidia_gpu_sm_89,
-  nvidia_gpu_sm_90,
+  //
+  // The requirement for the unique ID for nvidia_gpu_* architectures below is:
+  // - the ID must start with NVIDIA's vendor ID from underlied backend (which
+  //   is 0x10de)
+  // - the ID must end with SM version ID of that architecture
+  nvidia_gpu_sm_50 = 0x10de0050,
+  nvidia_gpu_sm_52 = 0x10de0052,
+  nvidia_gpu_sm_53 = 0x10de0053,
+  nvidia_gpu_sm_60 = 0x10de0060,
+  nvidia_gpu_sm_61 = 0x10de0061,
+  nvidia_gpu_sm_62 = 0x10de0062,
+  nvidia_gpu_sm_70 = 0x10de0070,
+  nvidia_gpu_sm_72 = 0x10de0072,
+  nvidia_gpu_sm_75 = 0x10de0075,
+  nvidia_gpu_sm_80 = 0x10de0080,
+  nvidia_gpu_sm_86 = 0x10de0086,
+  nvidia_gpu_sm_87 = 0x10de0087,
+  nvidia_gpu_sm_89 = 0x10de0089,
+  nvidia_gpu_sm_90 = 0x10de0090,
+  //
   // AMD architectures
-  amd_gpu_gfx700,
-  amd_gpu_gfx701,
-  amd_gpu_gfx702,
-  amd_gpu_gfx801,
-  amd_gpu_gfx802,
-  amd_gpu_gfx803,
-  amd_gpu_gfx805,
-  amd_gpu_gfx810,
-  amd_gpu_gfx900,
-  amd_gpu_gfx902,
-  amd_gpu_gfx904,
-  amd_gpu_gfx906,
-  amd_gpu_gfx908,
-  amd_gpu_gfx909,
-  amd_gpu_gfx90a,
-  amd_gpu_gfx90c,
-  amd_gpu_gfx940,
-  amd_gpu_gfx941,
-  amd_gpu_gfx942,
-  amd_gpu_gfx1010,
-  amd_gpu_gfx1011,
-  amd_gpu_gfx1012,
-  amd_gpu_gfx1013,
-  amd_gpu_gfx1030,
-  amd_gpu_gfx1031,
-  amd_gpu_gfx1032,
-  amd_gpu_gfx1033,
-  amd_gpu_gfx1034,
-  amd_gpu_gfx1035,
-  amd_gpu_gfx1036,
-  amd_gpu_gfx1100,
-  amd_gpu_gfx1101,
-  amd_gpu_gfx1102,
-  amd_gpu_gfx1103,
-  amd_gpu_gfx1150,
-  amd_gpu_gfx1151,
-  amd_gpu_gfx1200,
-  amd_gpu_gfx1201,
-  // Update "detail::max_architecture" below if you add new elements here!
+  //
+  // The requirement for the unique ID for amd_gpu_* architectures below is:
+  // - the ID must start with AMD's vendor ID from underlied backend (which is
+  //   0x1002)
+  // - the ID must end with GFX version ID of that architecture
+  amd_gpu_gfx700 = 0x10020700,
+  amd_gpu_gfx701 = 0x10020701,
+  amd_gpu_gfx702 = 0x10020702,
+  amd_gpu_gfx801 = 0x10020801,
+  amd_gpu_gfx802 = 0x10020802,
+  amd_gpu_gfx803 = 0x10020803,
+  amd_gpu_gfx805 = 0x10020805,
+  amd_gpu_gfx810 = 0x10020810,
+  amd_gpu_gfx900 = 0x10020900,
+  amd_gpu_gfx902 = 0x10020902,
+  amd_gpu_gfx904 = 0x10020904,
+  amd_gpu_gfx906 = 0x10020906,
+  amd_gpu_gfx908 = 0x10020908,
+  amd_gpu_gfx909 = 0x10020909,
+  amd_gpu_gfx90a = 0x1002090a,
+  amd_gpu_gfx90c = 0x1002090c,
+  amd_gpu_gfx940 = 0x10020940,
+  amd_gpu_gfx941 = 0x10020941,
+  amd_gpu_gfx942 = 0x10020942,
+  amd_gpu_gfx1010 = 0x10021010,
+  amd_gpu_gfx1011 = 0x10021011,
+  amd_gpu_gfx1012 = 0x10021012,
+  amd_gpu_gfx1013 = 0x10021013,
+  amd_gpu_gfx1030 = 0x10021030,
+  amd_gpu_gfx1031 = 0x10021031,
+  amd_gpu_gfx1032 = 0x10021032,
+  amd_gpu_gfx1033 = 0x10021033,
+  amd_gpu_gfx1034 = 0x10021034,
+  amd_gpu_gfx1035 = 0x10021035,
+  amd_gpu_gfx1036 = 0x10021036,
+  amd_gpu_gfx1100 = 0x10021100,
+  amd_gpu_gfx1101 = 0x10021101,
+  amd_gpu_gfx1102 = 0x10021102,
+  amd_gpu_gfx1103 = 0x10021103,
+  amd_gpu_gfx1150 = 0x10021150,
+  amd_gpu_gfx1151 = 0x10021151,
+  amd_gpu_gfx1200 = 0x10021200,
+  amd_gpu_gfx1201 = 0x10021201,
   intel_gpu_8_0_0 = intel_gpu_bdw,
   intel_gpu_9_0_9 = intel_gpu_skl,
   intel_gpu_9_1_9 = intel_gpu_kbl,
@@ -124,12 +153,37 @@ enum class architecture {
   intel_gpu_12_10_0 = intel_gpu_dg1,
 };
 
+enum class arch_category {
+  intel_gpu = 0,
+  nvidia_gpu = 1,
+  amd_gpu = 2,
+  // TODO: add intel_cpu
+};
+
 } // namespace ext::oneapi::experimental
 
 namespace detail {
 
-static constexpr ext::oneapi::experimental::architecture max_architecture =
-    ext::oneapi::experimental::architecture::amd_gpu_gfx1201;
+static constexpr ext::oneapi::experimental::architecture
+    min_intel_gpu_architecture =
+        ext::oneapi::experimental::architecture::intel_gpu_bdw;
+static constexpr ext::oneapi::experimental::architecture
+    max_intel_gpu_architecture =
+        ext::oneapi::experimental::architecture::intel_gpu_pvc_vg;
+
+static constexpr ext::oneapi::experimental::architecture
+    min_nvidia_gpu_architecture =
+        ext::oneapi::experimental::architecture::nvidia_gpu_sm_50;
+static constexpr ext::oneapi::experimental::architecture
+    max_nvidia_gpu_architecture =
+        ext::oneapi::experimental::architecture::nvidia_gpu_sm_90;
+
+static constexpr ext::oneapi::experimental::architecture
+    min_amd_gpu_architecture =
+        ext::oneapi::experimental::architecture::amd_gpu_gfx700;
+static constexpr ext::oneapi::experimental::architecture
+    max_amd_gpu_architecture =
+        ext::oneapi::experimental::architecture::amd_gpu_gfx1201;
 
 #ifndef __SYCL_TARGET_INTEL_X86_64__
 #define __SYCL_TARGET_INTEL_X86_64__ 0
@@ -439,167 +493,247 @@ static constexpr bool is_allowable_aot_mode =
     (__SYCL_TARGET_AMD_GPU_GFX1200__ == 1) ||
     (__SYCL_TARGET_AMD_GPU_GFX1201__ == 1);
 
-struct IsAOTForArchitectureClass {
-  // Allocate an array of size == size of
-  // ext::oneapi::experimental::architecture enum.
-  bool arr[static_cast<int>(max_architecture) + 1];
-
-  using arch = ext::oneapi::experimental::architecture;
-
-  constexpr IsAOTForArchitectureClass() : arr() {
-    arr[static_cast<int>(arch::x86_64)] = __SYCL_TARGET_INTEL_X86_64__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_bdw)] =
-        __SYCL_TARGET_INTEL_GPU_BDW__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_skl)] =
-        __SYCL_TARGET_INTEL_GPU_SKL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_kbl)] =
-        __SYCL_TARGET_INTEL_GPU_KBL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_cfl)] =
-        __SYCL_TARGET_INTEL_GPU_CFL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_apl)] =
-        __SYCL_TARGET_INTEL_GPU_APL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_glk)] =
-        __SYCL_TARGET_INTEL_GPU_GLK__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_whl)] =
-        __SYCL_TARGET_INTEL_GPU_WHL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_aml)] =
-        __SYCL_TARGET_INTEL_GPU_AML__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_cml)] =
-        __SYCL_TARGET_INTEL_GPU_CML__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_icllp)] =
-        __SYCL_TARGET_INTEL_GPU_ICLLP__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_ehl)] =
-        __SYCL_TARGET_INTEL_GPU_EHL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_tgllp)] =
-        __SYCL_TARGET_INTEL_GPU_TGLLP__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_rkl)] =
-        __SYCL_TARGET_INTEL_GPU_RKL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_adl_s)] =
-        __SYCL_TARGET_INTEL_GPU_ADL_S__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_adl_p)] =
-        __SYCL_TARGET_INTEL_GPU_ADL_P__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_adl_n)] =
-        __SYCL_TARGET_INTEL_GPU_ADL_N__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_dg1)] =
-        __SYCL_TARGET_INTEL_GPU_DG1__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_acm_g10)] =
-        __SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_acm_g11)] =
-        __SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_acm_g12)] =
-        __SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_pvc)] =
-        __SYCL_TARGET_INTEL_GPU_PVC__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_pvc_vg)] =
-        __SYCL_TARGET_INTEL_GPU_PVC_VG__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_50)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM50__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_52)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM52__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_53)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM53__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_60)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM60__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_61)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM61__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_62)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM62__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_70)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM70__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_72)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM72__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_75)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM75__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_80)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM80__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_86)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM86__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_87)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM87__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_89)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM89__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_90)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM90__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx700)] =
-        __SYCL_TARGET_AMD_GPU_GFX700__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx701)] =
-        __SYCL_TARGET_AMD_GPU_GFX701__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx702)] =
-        __SYCL_TARGET_AMD_GPU_GFX702__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx801)] =
-        __SYCL_TARGET_AMD_GPU_GFX801__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx802)] =
-        __SYCL_TARGET_AMD_GPU_GFX802__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx803)] =
-        __SYCL_TARGET_AMD_GPU_GFX803__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx805)] =
-        __SYCL_TARGET_AMD_GPU_GFX805__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx810)] =
-        __SYCL_TARGET_AMD_GPU_GFX810__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx900)] =
-        __SYCL_TARGET_AMD_GPU_GFX900__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx902)] =
-        __SYCL_TARGET_AMD_GPU_GFX902__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx904)] =
-        __SYCL_TARGET_AMD_GPU_GFX904__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx906)] =
-        __SYCL_TARGET_AMD_GPU_GFX906__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx908)] =
-        __SYCL_TARGET_AMD_GPU_GFX908__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx909)] =
-        __SYCL_TARGET_AMD_GPU_GFX909__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx90a)] =
-        __SYCL_TARGET_AMD_GPU_GFX90A__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx90c)] =
-        __SYCL_TARGET_AMD_GPU_GFX90C__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx940)] =
-        __SYCL_TARGET_AMD_GPU_GFX940__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx941)] =
-        __SYCL_TARGET_AMD_GPU_GFX941__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx942)] =
-        __SYCL_TARGET_AMD_GPU_GFX942__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1010)] =
-        __SYCL_TARGET_AMD_GPU_GFX1010__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1011)] =
-        __SYCL_TARGET_AMD_GPU_GFX1011__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1012)] =
-        __SYCL_TARGET_AMD_GPU_GFX1012__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1030)] =
-        __SYCL_TARGET_AMD_GPU_GFX1030__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1031)] =
-        __SYCL_TARGET_AMD_GPU_GFX1031__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1032)] =
-        __SYCL_TARGET_AMD_GPU_GFX1032__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1033)] =
-        __SYCL_TARGET_AMD_GPU_GFX1033__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1034)] =
-        __SYCL_TARGET_AMD_GPU_GFX1034__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1035)] =
-        __SYCL_TARGET_AMD_GPU_GFX1035__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1036)] =
-        __SYCL_TARGET_AMD_GPU_GFX1036__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1100)] =
-        __SYCL_TARGET_AMD_GPU_GFX1100__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1101)] =
-        __SYCL_TARGET_AMD_GPU_GFX1101__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1102)] =
-        __SYCL_TARGET_AMD_GPU_GFX1102__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1103)] =
-        __SYCL_TARGET_AMD_GPU_GFX1103__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1150)] =
-        __SYCL_TARGET_AMD_GPU_GFX1150__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1151)] =
-        __SYCL_TARGET_AMD_GPU_GFX1151__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1200)] =
-        __SYCL_TARGET_AMD_GPU_GFX1200__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1201)] =
-        __SYCL_TARGET_AMD_GPU_GFX1201__ == 1;
-  }
-};
+constexpr static std::optional<ext::oneapi::experimental::architecture>
+get_current_architecture_aot() {
+  // TODO: re-write the logic below when sycl_ext_oneapi_device_architecture will
+  // support -fsycl-targets will targets more than one
+#if __SYCL_TARGET_INTEL_X86_64__
+  return ext::oneapi::experimental::architecture::x86_64;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_BDW__
+  return ext::oneapi::experimental::architecture::intel_gpu_bdw;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_SKL__
+  return ext::oneapi::experimental::architecture::intel_gpu_skl;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_KBL__
+  return ext::oneapi::experimental::architecture::intel_gpu_kbl;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_CFL__
+  return ext::oneapi::experimental::architecture::intel_gpu_cfl;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_APL__
+  return ext::oneapi::experimental::architecture::intel_gpu_apl;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_GLK__
+  return ext::oneapi::experimental::architecture::intel_gpu_glk;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_WHL__
+  return ext::oneapi::experimental::architecture::intel_gpu_whl;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_AML__
+  return ext::oneapi::experimental::architecture::intel_gpu_aml;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_CML__
+  return ext::oneapi::experimental::architecture::intel_gpu_cml;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ICLLP__
+  return ext::oneapi::experimental::architecture::intel_gpu_icllp;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_EHL__
+  return ext::oneapi::experimental::architecture::intel_gpu_ehl;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_TGLLP__
+  return ext::oneapi::experimental::architecture::intel_gpu_tgllp;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_RKL__
+  return ext::oneapi::experimental::architecture::intel_gpu_rkl;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ADL_S__
+  return ext::oneapi::experimental::architecture::intel_gpu_adl_s;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ADL_P__
+  return ext::oneapi::experimental::architecture::intel_gpu_adl_p;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ADL_P__
+  return ext::oneapi::experimental::architecture::intel_gpu_adl_p;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ADL_N__
+  return ext::oneapi::experimental::architecture::intel_gpu_adl_n;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_DG1__
+  return ext::oneapi::experimental::architecture::intel_gpu_dg1;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ACM_G10__
+  return ext::oneapi::experimental::architecture::intel_gpu_acm_g10;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ACM_G11__
+  return ext::oneapi::experimental::architecture::intel_gpu_acm_g11;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ACM_G12__
+  return ext::oneapi::experimental::architecture::intel_gpu_acm_g12;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_PVC__
+  return ext::oneapi::experimental::architecture::intel_gpu_pvc;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_PVC_VG__
+  return ext::oneapi::experimental::architecture::intel_gpu_pvc_vg;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM50__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_50;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM52__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_52;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM53__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_53;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM60__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_60;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM61__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_61;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM62__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_62;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM70__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_70;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM72__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_72;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM75__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_75;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM80__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_80;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM86__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_86;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM87__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_87;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM89__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_89;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM90__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_90;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX700__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx700;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX701__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx701;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX702__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx702;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX801__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx801;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX802__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx802;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX803__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx803;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX805__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx805;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX810__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx810;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX900__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx900;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX902__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx902;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX904__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx904;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX906__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx906;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX908__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx908;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX909__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx909;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX90a__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx90a;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX90c__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx90c;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX940__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx940;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX941__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx941;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX942__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx942;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1010__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1010;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1011__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1011;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1012__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1012;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1030__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1030;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1031__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1031;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1032__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1032;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1033__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1033;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1034__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1034;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1035__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1035;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1036__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1036;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1100__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1100;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1101__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1101;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1102__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1102;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1103__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1103;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1150__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1150;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1151__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1151;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1200__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1200;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1201__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1201;
+#endif
+  return std::nullopt;
+}
 
-// One entry for each enumerator in "architecture" telling whether the AOT
-// target matches that architecture.
-static constexpr IsAOTForArchitectureClass is_aot_for_architecture;
+// Tells if the AOT target matches that architecture.
+constexpr static bool
+is_aot_for_architecture(ext::oneapi::experimental::architecture arch) {
+  constexpr std::optional<ext::oneapi::experimental::architecture>
+      current_arch = get_current_architecture_aot();
+  if (current_arch.has_value())
+    return static_cast<int>(arch) == static_cast<int>(*current_arch);
+  return false;
+}
 
 // Reads the value of "is_allowable_aot_mode" via a template to defer triggering
 // static_assert() until template instantiation time.
@@ -612,28 +746,249 @@ constexpr static bool allowable_aot_mode() {
 // pack.
 template <ext::oneapi::experimental::architecture... Archs>
 constexpr static bool device_architecture_is() {
-  return (is_aot_for_architecture.arr[static_cast<int>(Archs)] || ...);
+  return (is_aot_for_architecture(Archs) || ...);
+}
+
+static constexpr std::optional<ext::oneapi::experimental::architecture>
+get_category_min_architecture(
+    ext::oneapi::experimental::arch_category Category) {
+  if (Category == ext::oneapi::experimental::arch_category::intel_gpu) {
+    return min_intel_gpu_architecture;
+  } else if (Category == ext::oneapi::experimental::arch_category::nvidia_gpu) {
+    return min_nvidia_gpu_architecture;
+  } else if (Category == ext::oneapi::experimental::arch_category::amd_gpu) {
+    return min_amd_gpu_architecture;
+  } // add "else if " when adding new category, "else" not needed
+  return std::nullopt;
+}
+
+static constexpr std::optional<ext::oneapi::experimental::architecture>
+get_category_max_architecture(
+    ext::oneapi::experimental::arch_category Category) {
+  if (Category == ext::oneapi::experimental::arch_category::intel_gpu) {
+    return max_intel_gpu_architecture;
+  } else if (Category == ext::oneapi::experimental::arch_category::nvidia_gpu) {
+    return max_nvidia_gpu_architecture;
+  } else if (Category == ext::oneapi::experimental::arch_category::amd_gpu) {
+    return max_amd_gpu_architecture;
+  } // add "else if " when adding new category, "else" not needed
+  return std::nullopt;
+}
+
+template <ext::oneapi::experimental::arch_category Category>
+constexpr static bool device_architecture_is_in_category_aot() {
+  constexpr std::optional<ext::oneapi::experimental::architecture>
+      category_min_arch = get_category_min_architecture(Category);
+  constexpr std::optional<ext::oneapi::experimental::architecture>
+      category_max_arch = get_category_max_architecture(Category);
+  constexpr std::optional<ext::oneapi::experimental::architecture>
+      current_arch = get_current_architecture_aot();
+
+  if (category_min_arch.has_value() && category_max_arch.has_value() &&
+      current_arch.has_value())
+    if ((static_cast<int>(*category_min_arch) <=
+         static_cast<int>(*current_arch)) &&
+        (static_cast<int>(*current_arch) <=
+         static_cast<int>(*category_max_arch)))
+      return true;
+
+  return false;
+}
+
+template <ext::oneapi::experimental::arch_category... Categories>
+constexpr static bool device_architecture_is_in_categories() {
+  return (device_architecture_is_in_category_aot<Categories>() || ...);
 }
 
-// Helper object used to implement "else_if_architecture_is" and "otherwise".
-// The "MakeCall" template parameter tells whether a previous clause in the
-// "if-elseif-elseif ..." chain was true.  When "MakeCall" is false, some
-// previous clause was true, so none of the subsequent
-// "else_if_architecture_is" or "otherwise" member functions should call the
-// user's function.
+constexpr static std::optional<ext::oneapi::experimental::arch_category>
+get_architecture_category(ext::oneapi::experimental::architecture arch) {
+  auto arch_is_in_segment =
+      [&arch](ext::oneapi::experimental::architecture min,
+              ext::oneapi::experimental::architecture max) {
+        if ((static_cast<int>(min) <= static_cast<int>(arch)) &&
+            (static_cast<int>(arch) <= static_cast<int>(max)))
+          return true;
+        return false;
+      };
+
+  if (arch_is_in_segment(min_intel_gpu_architecture,
+                         max_intel_gpu_architecture))
+    return ext::oneapi::experimental::arch_category::intel_gpu;
+  if (arch_is_in_segment(min_nvidia_gpu_architecture,
+                         max_nvidia_gpu_architecture))
+    return ext::oneapi::experimental::arch_category::nvidia_gpu;
+  if (arch_is_in_segment(min_amd_gpu_architecture, max_amd_gpu_architecture))
+    return ext::oneapi::experimental::arch_category::amd_gpu;
+  // add "if " when adding new category
+
+  return std::nullopt;
+}
+
+template <ext::oneapi::experimental::architecture Arch, typename Compare>
+constexpr static bool device_architecture_comparison_aot(Compare comp) {
+  constexpr std::optional<ext::oneapi::experimental::arch_category>
+      input_arch_category = get_architecture_category(Arch);
+  constexpr std::optional<ext::oneapi::experimental::architecture>
+      current_arch = get_current_architecture_aot();
+
+  if (input_arch_category.has_value() && current_arch.has_value()) {
+    std::optional<ext::oneapi::experimental::arch_category>
+        current_arch_category = get_architecture_category(*current_arch);
+    if (current_arch_category.has_value() &&
+        (*input_arch_category == *current_arch_category))
+      return comp(*current_arch, Arch);
+  }
+  return false;
+}
+
+// Helper object used to implement "else_if_architecture_is",
+// "else_if_architecture_is_*" and "otherwise". The "MakeCall" template
+// parameter tells whether a previous clause in the "if-elseif-elseif ..." chain
+// was true.  When "MakeCall" is false, some previous clause was true, so none
+// of the subsequent "else_if_architecture_is", "else_if_architecture_is_*" or
+// "otherwise" member functions should call the user's function.
 template <bool MakeCall> class if_architecture_helper {
 public:
+  /// The condition is `true` only if the object F comes from a previous call
+  /// whose associated condition is `false` *and* if the device which executes
+  /// the `else_if_architecture_is` function has any one of the architectures
+  /// listed in the @tparam Archs parameter pack.
   template <ext::oneapi::experimental::architecture... Archs, typename T>
-  constexpr auto else_if_architecture_is(T fnTrue) {
+  constexpr auto else_if_architecture_is(T fn) {
     if constexpr (MakeCall && device_architecture_is<Archs...>()) {
-      fnTrue();
+      fn();
       return if_architecture_helper<false>{};
     } else {
-      (void)fnTrue;
+      (void)fn;
       return if_architecture_helper<MakeCall>{};
     }
   }
 
+  /// The condition is `true` only if the object F comes from a previous call
+  /// whose associated condition is `false` *and* if the device which executes
+  /// the `else_if_architecture_is` function has an architecture that is in any
+  /// one of the categories listed in the @tparam Categories pack.
+  template <ext::oneapi::experimental::arch_category... Categories, typename T>
+  constexpr auto else_if_architecture_is(T fn) {
+    if constexpr (MakeCall &&
+                  device_architecture_is_in_categories<Categories...>()) {
+      fn();
+      return if_architecture_helper<false>{};
+    } else {
+      (void)fn;
+      return if_architecture_helper<MakeCall>{};
+    }
+  }
+
+  /// The condition is `true` only if the object F comes from a previous call
+  /// whose associated condition is `false` *and* if the device which executes
+  /// the `else_if_architecture_is_lt` function has an architecture that is in
+  /// the same family as @tparam Arch and compares less than @tparam Arch.
+  template <ext::oneapi::experimental::architecture Arch, typename T>
+  constexpr auto else_if_architecture_is_lt(T fn) {
+    if constexpr (MakeCall &&
+                  sycl::detail::device_architecture_comparison_aot<Arch>(
+                      [](ext::oneapi::experimental::architecture a,
+                         ext::oneapi::experimental::architecture b) {
+                        return a < b;
+                      })) {
+      fn();
+      return sycl::detail::if_architecture_helper<false>{};
+    } else {
+      (void)fn;
+      return sycl::detail::if_architecture_helper<MakeCall>{};
+    }
+  }
+
+  /// The condition is `true` only if the object F comes from a previous call
+  /// whose associated condition is `false` *and* if the device which executes
+  /// the `else_if_architecture_is_le` function has an architecture that is in
+  /// the same family as @tparam Arch and compares less than or equal to @tparam
+  /// Arch.
+  template <ext::oneapi::experimental::architecture Arch, typename T>
+  constexpr auto else_if_architecture_is_le(T fn) {
+    if constexpr (MakeCall &&
+                  sycl::detail::device_architecture_comparison_aot<Arch>(
+                      [](ext::oneapi::experimental::architecture a,
+                         ext::oneapi::experimental::architecture b) {
+                        return a <= b;
+                      })) {
+      fn();
+      return sycl::detail::if_architecture_helper<false>{};
+    } else {
+      (void)fn;
+      return sycl::detail::if_architecture_helper<MakeCall>{};
+    }
+  }
+
+  /// The condition is `true` only if the object F comes from a previous call
+  /// whose associated condition is `false` *and* if the device which executes
+  /// the `else_if_architecture_is_gt` function has an architecture that is in
+  /// the same family as @tparam Arch and compares greater than @tparam Arch.
+  template <ext::oneapi::experimental::architecture Arch, typename T>
+  constexpr auto else_if_architecture_is_gt(T fn) {
+    if constexpr (MakeCall &&
+                  sycl::detail::device_architecture_comparison_aot<Arch>(
+                      [](ext::oneapi::experimental::architecture a,
+                         ext::oneapi::experimental::architecture b) {
+                        return a > b;
+                      })) {
+      fn();
+      return sycl::detail::if_architecture_helper<false>{};
+    } else {
+      (void)fn;
+      return sycl::detail::if_architecture_helper<MakeCall>{};
+    }
+  }
+
+  /// The condition is `true` only if the object F comes from a previous call
+  /// whose associated condition is `false` *and* if the device which executes
+  /// the `else_if_architecture_is_ge` function has an architecture that is in
+  /// the same family as @tparam Arch and compares greater than or equal to
+  /// @tparam Arch.
+  template <ext::oneapi::experimental::architecture Arch, typename T>
+  constexpr auto else_if_architecture_is_ge(T fn) {
+    if constexpr (MakeCall &&
+                  sycl::detail::device_architecture_comparison_aot<Arch>(
+                      [](ext::oneapi::experimental::architecture a,
+                         ext::oneapi::experimental::architecture b) {
+                        return a >= b;
+                      })) {
+      fn();
+      return sycl::detail::if_architecture_helper<false>{};
+    } else {
+      (void)fn;
+      return sycl::detail::if_architecture_helper<MakeCall>{};
+    }
+  }
+
+  /// The condition is `true` only if the object F comes from a previous call
+  /// whose associated condition is `false` *and* if the device which executes
+  /// the `else_if_architecture_is_between` function has an architecture that is
+  /// in the same family as @tparam Arch1 and is greater than or equal to
+  /// @tparam Arch1 and is less than or equal to @tparam Arch2.
+  template <ext::oneapi::experimental::architecture Arch1,
+            ext::oneapi::experimental::architecture Arch2, typename T>
+  constexpr auto else_if_architecture_is_between(T fn) {
+    if constexpr (MakeCall &&
+                  sycl::detail::device_architecture_comparison_aot<Arch1>(
+                      [](ext::oneapi::experimental::architecture a,
+                         ext::oneapi::experimental::architecture b) {
+                        return a >= b;
+                      }) &&
+                  sycl::detail::device_architecture_comparison_aot<Arch2>(
+                      [](ext::oneapi::experimental::architecture a,
+                         ext::oneapi::experimental::architecture b) {
+                        return a <= b;
+                      })) {
+      fn();
+      return sycl::detail::if_architecture_helper<false>{};
+    } else {
+      (void)fn;
+      return sycl::detail::if_architecture_helper<MakeCall>{};
+    }
+  }
+
   template <typename T> constexpr void otherwise(T fn) {
     if constexpr (MakeCall) {
       fn();
@@ -644,17 +999,113 @@ template <bool MakeCall> class if_architecture_helper {
 
 namespace ext::oneapi::experimental {
 
+/// The condition is `true` only if the device which executes the
+/// `if_architecture_is` function has any one of the architectures listed in the
+/// @tparam Archs pack.
 template <architecture... Archs, typename T>
-constexpr static auto if_architecture_is(T fnTrue) {
+constexpr static auto if_architecture_is(T fn) {
   static_assert(sycl::detail::allowable_aot_mode<Archs...>(),
                 "The if_architecture_is function may only be used when AOT "
                 "compiling with '-fsycl-targets=spir64_x86_64' or "
                 "'-fsycl-targets=*_gpu_*'");
   if constexpr (sycl::detail::device_architecture_is<Archs...>()) {
-    fnTrue();
+    fn();
+    return sycl::detail::if_architecture_helper<false>{};
+  } else {
+    (void)fn;
+    return sycl::detail::if_architecture_helper<true>{};
+  }
+}
+
+/// The condition is `true` only if the device which executes the
+/// `if_architecture_is` function has an architecture that is in any one of the
+/// categories listed in the @tparam Categories pack.
+template <arch_category... Categories, typename T>
+constexpr static auto if_architecture_is(T fn) {
+  if constexpr (sycl::detail::device_architecture_is_in_categories<
+                    Categories...>()) {
+    fn();
+    return sycl::detail::if_architecture_helper<false>{};
+  } else {
+    (void)fn;
+    return sycl::detail::if_architecture_helper<true>{};
+  }
+}
+
+/// The condition is `true` only if the device which executes the
+/// `if_architecture_is_lt` function has an architecture that is in the same
+/// family as @tparam Arch and compares less than @tparam Arch.
+template <architecture Arch, typename T>
+constexpr static auto if_architecture_is_lt(T fn) {
+  if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
+                    [](architecture a, architecture b) { return a < b; })) {
+    fn();
+    return sycl::detail::if_architecture_helper<false>{};
+  } else {
+    (void)fn;
+    return sycl::detail::if_architecture_helper<true>{};
+  }
+}
+
+/// The condition is `true` only if the device which executes the
+/// `if_architecture_is_le` function has an architecture that is in the same
+/// family as @tparam Arch and compares less than or equal to @tparam Arch.
+template <architecture Arch, typename T>
+constexpr static auto if_architecture_is_le(T fn) {
+  if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
+                    [](architecture a, architecture b) { return a <= b; })) {
+    fn();
+    return sycl::detail::if_architecture_helper<false>{};
+  } else {
+    (void)fn;
+    return sycl::detail::if_architecture_helper<true>{};
+  }
+}
+
+/// The condition is `true` only if the device which executes the
+/// `if_architecture_is_gt` function has an architecture that is in the same
+/// family as @tparam Arch and compares greater than @tparam Arch.
+template <architecture Arch, typename T>
+constexpr static auto if_architecture_is_gt(T fn) {
+  if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
+                    [](architecture a, architecture b) { return a > b; })) {
+    fn();
+    return sycl::detail::if_architecture_helper<false>{};
+  } else {
+    (void)fn;
+    return sycl::detail::if_architecture_helper<true>{};
+  }
+}
+
+/// The condition is `true` only if the device which executes the
+/// `if_architecture_is_ge` function has an architecture that is in the same
+/// family as @tparam Arch and compares greater than or equal to @tparam Arch.
+template <architecture Arch, typename T>
+constexpr static auto if_architecture_is_ge(T fn) {
+  if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
+                    [](architecture a, architecture b) { return a >= b; })) {
+    fn();
+    return sycl::detail::if_architecture_helper<false>{};
+  } else {
+    (void)fn;
+    return sycl::detail::if_architecture_helper<true>{};
+  }
+}
+
+/// The condition is `true` only if the device which executes the
+/// `if_architecture_is_between` function has an architecture that is in the
+/// same family as @tparam Arch1 and is greater than or equal to @tparam
+/// Arch1 and is less than or equal to @tparam Arch2.
+template <architecture Arch1, architecture Arch2, typename T>
+constexpr static auto if_architecture_is_between(T fn) {
+  if constexpr (sycl::detail::device_architecture_comparison_aot<Arch1>(
+                    [](architecture a, architecture b) { return a >= b; }) &&
+                sycl::detail::device_architecture_comparison_aot<Arch2>(
+                    [](architecture a, architecture b) { return a <= b; })) {
+    fn();
     return sycl::detail::if_architecture_helper<false>{};
   } else {
-    (void)fnTrue;
+    (void)fn;
     return sycl::detail::if_architecture_helper<true>{};
   }
 }
diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp
index 8ecaa6a2bfc62..7771799ff7db7 100644
--- a/sycl/source/detail/device_impl.hpp
+++ b/sycl/source/detail/device_impl.hpp
@@ -12,6 +12,7 @@
 #include <sycl/aspects.hpp>
 #include <sycl/detail/cl.h>
 #include <sycl/detail/pi.hpp>
+#include <sycl/ext/oneapi/experimental/device_architecture.hpp>
 #include <sycl/kernel_bundle.hpp>
 #include <sycl/stl.hpp>
 
@@ -239,6 +240,18 @@ class device_impl {
     return Arch == getDeviceArch();
   }
 
+  bool extOneapiArchitectureIs(
+      ext::oneapi::experimental::arch_category Category) const {
+    std::optional<ext::oneapi::experimental::architecture> CategoryMinArch =
+        get_category_min_architecture(Category);
+    std::optional<ext::oneapi::experimental::architecture> CategoryMaxArch =
+        get_category_max_architecture(Category);
+    if (CategoryMinArch.has_value() && CategoryMaxArch.has_value())
+      return CategoryMinArch <= getDeviceArch() &&
+             getDeviceArch() <= CategoryMaxArch;
+    return false;
+  }
+
   /// Gets the current device timestamp
   /// @throw sycl::feature_not_supported if feature is not supported on device
   uint64_t getCurrentDeviceTime();
diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp
index 73a05080c5b0d..0d39bc825d82b 100644
--- a/sycl/source/device.cpp
+++ b/sycl/source/device.cpp
@@ -257,10 +257,16 @@ bool device::ext_oneapi_can_access_peer(const device &peer,
 
   return value == 1;
 }
+
 bool device::ext_oneapi_architecture_is(
     ext::oneapi::experimental::architecture arch) {
   return impl->extOneapiArchitectureIs(arch);
 }
 
+bool device::ext_oneapi_architecture_is(
+    ext::oneapi::experimental::arch_category category) {
+  return impl->extOneapiArchitectureIs(category);
+}
+
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp
new file mode 100644
index 0000000000000..cb1214395f8e4
--- /dev/null
+++ b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp
@@ -0,0 +1,199 @@
+// REQUIRES: gpu, gpu-intel-pvc
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc %s -o %t.out
+// RUN: %{run} %t.out
+
+#include <sycl/ext/oneapi/experimental/device_architecture.hpp>
+#include <sycl/sycl.hpp>
+
+using namespace sycl;
+using namespace sycl::ext::oneapi::experimental;
+
+int main() {
+  int *result;
+  int N = 29;
+
+  {
+    queue q(gpu_selector_v);
+
+    result = (int *)malloc_shared(N * sizeof(int), q);
+
+    q.submit([&](handler &cgh) { cgh.fill(result, 0, N); }).wait();
+
+    q.submit([&](handler &cgh) {
+      cgh.single_task([=]() {
+        // test if_architecture_is with category
+        if_architecture_is<arch_category::intel_gpu>([&]() {
+          result[0] = 1;
+        }).otherwise([&]() {});
+
+        // negative test if_architecture_is with category
+        if_architecture_is<arch_category::nvidia_gpu>([&]() {}).otherwise(
+            [&]() { result[1] = 1; });
+
+        // test else_if_architecture_is with category - 1
+        if_architecture_is<arch_category::nvidia_gpu>([&]() {})
+            .else_if_architecture_is<arch_category::intel_gpu>(
+                [&]() { result[2] = 1; })
+            .otherwise([&]() {});
+
+        // test else_if_architecture_is with category - 2
+        if_architecture_is<arch_category::nvidia_gpu>([&]() {})
+            .else_if_architecture_is<arch_category::amd_gpu>([&]() {})
+            .else_if_architecture_is<arch_category::intel_gpu>(
+                [&]() { result[3] = 1; })
+            .otherwise([&]() {});
+
+        // test if_architecture_is_lt
+        if_architecture_is_lt<architecture::intel_gpu_pvc_vg>([&]() {
+          result[4] = 1;
+        }).otherwise([&]() {});
+
+        // negative test if_architecture_is_lt - 1
+        if_architecture_is_lt<architecture::intel_gpu_pvc>([&]() {}).otherwise(
+            [&]() { result[5] = 1; });
+
+        // negative test if_architecture_is_lt - 2
+        if_architecture_is_lt<architecture::nvidia_gpu_sm_89>([&]() {
+        }).otherwise([&]() { result[6] = 1; });
+
+        // test else_if_architecture_is_lt - 1
+        if_architecture_is_lt<architecture::intel_gpu_bdw>([&]() {})
+            .else_if_architecture_is_lt<architecture::intel_gpu_pvc_vg>(
+                [&]() { result[7] = 1; })
+            .otherwise([&]() {});
+
+        // test else_if_architecture_is_lt - 2
+        if_architecture_is_lt<architecture::intel_gpu_bdw>([&]() {
+        }).else_if_architecture_is_lt<architecture::nvidia_gpu_sm_89>([&]() {})
+            .else_if_architecture_is_lt<architecture::intel_gpu_pvc_vg>(
+                [&]() { result[8] = 1; })
+            .otherwise([&]() {});
+
+        // test if_architecture_is_le
+        if_architecture_is_le<architecture::intel_gpu_pvc>([&]() {
+          result[9] = 1;
+        }).otherwise([&]() {});
+
+        // negative test if_architecture_is_le - 1
+        if_architecture_is_le<architecture::intel_gpu_skl>([&]() {}).otherwise(
+            [&]() { result[10] = 1; });
+
+        // negative test if_architecture_is_le - 2
+        if_architecture_is_le<architecture::nvidia_gpu_sm_89>([&]() {
+        }).otherwise([&]() { result[11] = 1; });
+
+        // test else_if_architecture_is_le - 1
+        if_architecture_is_le<architecture::intel_gpu_skl>([&]() {})
+            .else_if_architecture_is_le<architecture::intel_gpu_pvc>(
+                [&]() { result[12] = 1; })
+            .otherwise([&]() {});
+
+        // test else_if_architecture_is_le - 2
+        if_architecture_is_le<architecture::intel_gpu_skl>([&]() {
+        }).else_if_architecture_is_le<architecture::nvidia_gpu_sm_89>([&]() {})
+            .else_if_architecture_is_le<architecture::intel_gpu_pvc>(
+                [&]() { result[13] = 1; })
+            .otherwise([&]() {});
+
+        // test if_architecture_is_gt
+        if_architecture_is_gt<architecture::intel_gpu_skl>([&]() {
+          result[14] = 1;
+        }).otherwise([&]() {});
+
+        // negative test if_architecture_is_gt - 1
+        if_architecture_is_gt<architecture::intel_gpu_pvc_vg>([&]() {
+        }).otherwise([&]() { result[15] = 1; });
+
+        // negative test if_architecture_is_gt - 2
+        if_architecture_is_gt<architecture::nvidia_gpu_sm_89>([&]() {
+        }).otherwise([&]() { result[16] = 1; });
+
+        // test else_if_architecture_is_gt - 1
+        if_architecture_is_gt<architecture::intel_gpu_pvc_vg>([&]() {})
+            .else_if_architecture_is_gt<architecture::intel_gpu_skl>(
+                [&]() { result[17] = 1; })
+            .otherwise([&]() {});
+
+        // test else_if_architecture_is_gt - 2
+        if_architecture_is_gt<architecture::intel_gpu_pvc_vg>([&]() {
+        }).else_if_architecture_is_gt<architecture::nvidia_gpu_sm_89>([&]() {})
+            .else_if_architecture_is_gt<architecture::intel_gpu_skl>(
+                [&]() { result[18] = 1; })
+            .otherwise([&]() {});
+
+        // test if_architecture_is_ge
+        if_architecture_is_ge<architecture::intel_gpu_pvc>([&]() {
+          result[19] = 1;
+        }).otherwise([&]() {});
+
+        // negative test if_architecture_is_ge - 1
+        if_architecture_is_ge<architecture::intel_gpu_pvc_vg>([&]() {
+        }).otherwise([&]() { result[20] = 1; });
+
+        // negative test if_architecture_is_ge - 2
+        if_architecture_is_ge<architecture::nvidia_gpu_sm_89>([&]() {
+        }).otherwise([&]() { result[21] = 1; });
+
+        // test else_if_architecture_is_ge - 1
+        if_architecture_is_ge<architecture::intel_gpu_pvc_vg>([&]() {})
+            .else_if_architecture_is_ge<architecture::intel_gpu_pvc>(
+                [&]() { result[22] = 1; })
+            .otherwise([&]() {});
+
+        // test else_if_architecture_is_ge - 2
+        if_architecture_is_ge<architecture::intel_gpu_pvc_vg>([&]() {
+        }).else_if_architecture_is_ge<architecture::nvidia_gpu_sm_89>([&]() {})
+            .else_if_architecture_is_ge<architecture::intel_gpu_pvc>(
+                [&]() { result[23] = 1; })
+            .otherwise([&]() {});
+
+        // test if_architecture_is_between
+        if_architecture_is_between<architecture::intel_gpu_skl,
+                                   architecture::intel_gpu_pvc_vg>([&]() {
+          result[24] = 1;
+        }).otherwise([&]() {});
+
+        // negative test if_architecture_is_between - 1
+        if_architecture_is_between<architecture::intel_gpu_dg1,
+                                   architecture::intel_gpu_dg2_g12>([&]() {
+        }).otherwise([&]() { result[25] = 1; });
+
+        // negative test if_architecture_is_between - 2
+        if_architecture_is_between<architecture::intel_gpu_dg1,
+                                   architecture::nvidia_gpu_sm_89>([&]() {
+        }).otherwise([&]() { result[26] = 1; });
+
+        // test else_if_architecture_is_between - 1
+        if_architecture_is_between<architecture::intel_gpu_dg1,
+                                   architecture::intel_gpu_dg2_g12>([&]() {})
+            .else_if_architecture_is_between<architecture::intel_gpu_skl,
+                                             architecture::intel_gpu_pvc>(
+                [&]() { result[27] = 1; })
+            .otherwise([&]() {});
+
+        // test else_if_architecture_is_between - 2
+        if_architecture_is_between<architecture::intel_gpu_dg1,
+                                   architecture::intel_gpu_dg2_g12>([&]() {})
+            .else_if_architecture_is_between<architecture::intel_gpu_dg1,
+                                             architecture::nvidia_gpu_sm_89>(
+                [&]() {})
+            .else_if_architecture_is_between<architecture::intel_gpu_skl,
+                                             architecture::intel_gpu_pvc>(
+                [&]() { result[28] = 1; })
+            .otherwise([&]() {});
+
+        // if adding new test here, don't forget to increment result's index and
+        // value of N variable
+      });
+    });
+  }
+
+  bool failed = false;
+  for (int i = 0; i < N; i++)
+    if (result[i] != 1) {
+      std::cout << "Verification of the test " << i << " failed." << std::endl;
+      failed = true;
+    }
+
+  return failed;
+}
diff --git a/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp
new file mode 100644
index 0000000000000..eafbc0f2aadad
--- /dev/null
+++ b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp
@@ -0,0 +1,42 @@
+// REQUIRES: gpu
+
+// This test is written only for Intel architectures. It is expected that this
+// test will fail on NVIDIA and AMD as the checks for ext_oneapi_architecture_is
+// host API expect that device architecture is Intel GPU
+// UNSUPPORTED: cuda, hip
+
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+#include <sycl/sycl.hpp>
+
+namespace syclex = sycl::ext::oneapi::experimental;
+
+int main() {
+  sycl::queue q;
+  sycl::device dev = q.get_device();
+
+  assert(dev.ext_oneapi_architecture_is(syclex::arch_category::intel_gpu));
+  assert(!dev.ext_oneapi_architecture_is(syclex::arch_category::nvidia_gpu));
+  assert(!dev.ext_oneapi_architecture_is(syclex::arch_category::amd_gpu));
+
+  syclex::architecture intel_gpu_arch = syclex::architecture::intel_gpu_ehl;
+  assert(intel_gpu_arch < syclex::architecture::intel_gpu_pvc);
+  assert(intel_gpu_arch <= syclex::architecture::intel_gpu_pvc);
+  assert(intel_gpu_arch > syclex::architecture::intel_gpu_skl);
+  assert(intel_gpu_arch >= syclex::architecture::intel_gpu_ehl);
+
+  syclex::architecture nvidia_gpu_arch = syclex::architecture::nvidia_gpu_sm_70;
+  assert(nvidia_gpu_arch < syclex::architecture::nvidia_gpu_sm_80);
+  assert(nvidia_gpu_arch <= syclex::architecture::nvidia_gpu_sm_80);
+  assert(nvidia_gpu_arch > syclex::architecture::nvidia_gpu_sm_53);
+  assert(nvidia_gpu_arch >= syclex::architecture::nvidia_gpu_sm_70);
+
+  syclex::architecture amd_gpu_arch = syclex::architecture::amd_gpu_gfx908;
+  assert(amd_gpu_arch < syclex::architecture::amd_gpu_gfx1031);
+  assert(amd_gpu_arch <= syclex::architecture::amd_gpu_gfx1031);
+  assert(amd_gpu_arch > syclex::architecture::amd_gpu_gfx810);
+  assert(amd_gpu_arch >= syclex::architecture::amd_gpu_gfx908);
+
+  return 0;
+}

From cce8b746819f54fc7784920ebcc8a8c5972a28d8 Mon Sep 17 00:00:00 2001
From: Dmitry Vodoypanov <dmitry.vodopyanov@intel.com>
Date: Sat, 17 Feb 2024 10:31:20 -0800
Subject: [PATCH 02/11] Cleanup

---
 .../experimental/device_architecture.hpp      | 21 ++++++++-----------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
index dfc2de8665960..6e76be4dad8c6 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
@@ -495,8 +495,8 @@ static constexpr bool is_allowable_aot_mode =
 
 constexpr static std::optional<ext::oneapi::experimental::architecture>
 get_current_architecture_aot() {
-  // TODO: re-write the logic below when sycl_ext_oneapi_device_architecture will
-  // support -fsycl-targets will targets more than one
+  // TODO: re-write the logic below when sycl_ext_oneapi_device_architecture
+  // will support targets more than one in -fsycl-targets
 #if __SYCL_TARGET_INTEL_X86_64__
   return ext::oneapi::experimental::architecture::x86_64;
 #endif
@@ -731,7 +731,7 @@ is_aot_for_architecture(ext::oneapi::experimental::architecture arch) {
   constexpr std::optional<ext::oneapi::experimental::architecture>
       current_arch = get_current_architecture_aot();
   if (current_arch.has_value())
-    return static_cast<int>(arch) == static_cast<int>(*current_arch);
+    return arch == *current_arch;
   return false;
 }
 
@@ -786,10 +786,8 @@ constexpr static bool device_architecture_is_in_category_aot() {
 
   if (category_min_arch.has_value() && category_max_arch.has_value() &&
       current_arch.has_value())
-    if ((static_cast<int>(*category_min_arch) <=
-         static_cast<int>(*current_arch)) &&
-        (static_cast<int>(*current_arch) <=
-         static_cast<int>(*category_max_arch)))
+    if ((*category_min_arch <= *current_arch) &&
+        (*current_arch <= *category_max_arch))
       return true;
 
   return false;
@@ -801,12 +799,11 @@ constexpr static bool device_architecture_is_in_categories() {
 }
 
 constexpr static std::optional<ext::oneapi::experimental::arch_category>
-get_architecture_category(ext::oneapi::experimental::architecture arch) {
+get_device_architecture_category(ext::oneapi::experimental::architecture arch) {
   auto arch_is_in_segment =
       [&arch](ext::oneapi::experimental::architecture min,
               ext::oneapi::experimental::architecture max) {
-        if ((static_cast<int>(min) <= static_cast<int>(arch)) &&
-            (static_cast<int>(arch) <= static_cast<int>(max)))
+        if ((min <= arch) && (arch <= max))
           return true;
         return false;
       };
@@ -827,13 +824,13 @@ get_architecture_category(ext::oneapi::experimental::architecture arch) {
 template <ext::oneapi::experimental::architecture Arch, typename Compare>
 constexpr static bool device_architecture_comparison_aot(Compare comp) {
   constexpr std::optional<ext::oneapi::experimental::arch_category>
-      input_arch_category = get_architecture_category(Arch);
+      input_arch_category = get_device_architecture_category(Arch);
   constexpr std::optional<ext::oneapi::experimental::architecture>
       current_arch = get_current_architecture_aot();
 
   if (input_arch_category.has_value() && current_arch.has_value()) {
     std::optional<ext::oneapi::experimental::arch_category>
-        current_arch_category = get_architecture_category(*current_arch);
+        current_arch_category = get_device_architecture_category(*current_arch);
     if (current_arch_category.has_value() &&
         (*input_arch_category == *current_arch_category))
       return comp(*current_arch, Arch);

From a85f2620042d83234ffb4e693ad9bdac7539f63f Mon Sep 17 00:00:00 2001
From: Dmitry Vodoypanov <dmitry.vodopyanov@intel.com>
Date: Sat, 17 Feb 2024 11:39:27 -0800
Subject: [PATCH 03/11] Update ABI tests

---
 sycl/test/abi/sycl_symbols_linux.dump   |  1 +
 sycl/test/abi/sycl_symbols_windows.dump | 71 +++++++++++++------------
 2 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump
index edc3d9b41a00d..f11795e9f7887 100644
--- a/sycl/test/abi/sycl_symbols_linux.dump
+++ b/sycl/test/abi/sycl_symbols_linux.dump
@@ -4058,6 +4058,7 @@ _ZN4sycl3_V16detail9join_implERKSt6vectorISt10shared_ptrINS1_18kernel_bundle_imp
 _ZN4sycl3_V16detail9link_implERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EERKS2_INS0_6deviceESaISA_EERKNS0_13property_listE
 _ZN4sycl3_V16device11get_devicesENS0_4info11device_typeE
 _ZN4sycl3_V16device26ext_oneapi_architecture_isENS0_3ext6oneapi12experimental12architectureE
+_ZN4sycl3_V16device26ext_oneapi_architecture_isENS0_3ext6oneapi12experimental13arch_categoryE
 _ZN4sycl3_V16device26ext_oneapi_can_access_peerERKS1_NS0_3ext6oneapi11peer_accessE
 _ZN4sycl3_V16device29ext_oneapi_enable_peer_accessERKS1_
 _ZN4sycl3_V16device30ext_oneapi_disable_peer_accessERKS1_
diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump
index 98be6da4c5f37..dbc384d50ff5e 100644
--- a/sycl/test/abi/sycl_symbols_windows.dump
+++ b/sycl/test/abi/sycl_symbols_windows.dump
@@ -1012,6 +1012,7 @@
 ?ext_intel_read_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z
 ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z
 ?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4architecture@experimental@oneapi@ext@23@@Z
+?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4arch_category@experimental@oneapi@ext@23@@Z
 ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@@Z
 ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXXZ
 ?ext_oneapi_can_access_peer@device@_V1@sycl@@QEAA_NAEBV123@W4peer_access@oneapi@ext@23@@Z
@@ -1278,38 +1279,38 @@
 ?get_max_statement_size@stream@_V1@sycl@@QEBA_KXZ
 ?get_max_statement_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ
 ?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVdevice@45@AEBVcontext@45@@Z
-?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVqueue@45@@Z
-?get_mip_level_mem_handle@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AUimage_mem_handle@23456@I@Z
-?get_name@kernel_id@_V1@sycl@@QEBAPEBDXZ
-?get_node_from_event@node@experimental@oneapi@ext@_V1@sycl@@SA?AV123456@Vevent@56@@Z
-?get_nodes@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
-?get_num_channels@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBAIXZ
-?get_pipe_name@pipe_base@experimental@intel@ext@_V1@sycl@@KA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEBX@Z
-?get_pitch@image_impl@detail@_V1@sycl@@QEBA?AV?$range@$01@34@XZ
+?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVqueue@45@@Z
+?get_mip_level_mem_handle@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AUimage_mem_handle@23456@I@Z
+?get_name@kernel_id@_V1@sycl@@QEBAPEBDXZ
+?get_node_from_event@node@experimental@oneapi@ext@_V1@sycl@@SA?AV123456@Vevent@56@@Z
+?get_nodes@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
+?get_num_channels@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBAIXZ
+?get_pipe_name@pipe_base@experimental@intel@ext@_V1@sycl@@KA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEBX@Z
+?get_pitch@image_impl@detail@_V1@sycl@@QEBA?AV?$range@$01@34@XZ
 ?get_pitch@image_plain@detail@_V1@sycl@@IEBA?AV?$range@$01@34@XZ
 ?get_platform@context@_V1@sycl@@QEBA?AVplatform@23@XZ
 ?get_platform@device@_V1@sycl@@QEBA?AVplatform@23@XZ
 ?get_platforms@platform@_V1@sycl@@SA?AV?$vector@Vplatform@_V1@sycl@@V?$allocator@Vplatform@_V1@sycl@@@std@@@std@@XZ
-?get_pointer_device@_V1@sycl@@YA?AVdevice@12@PEBXAEBVcontext@12@@Z
-?get_pointer_type@_V1@sycl@@YA?AW4alloc@usm@12@PEBXAEBVcontext@12@@Z
-?get_precision@stream@_V1@sycl@@QEBA_KXZ
-?get_predecessors@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
-?get_queue@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEBA?AVqueue@56@XZ
-?get_range@image_impl@detail@_V1@sycl@@QEBA?AV?$range@$02@34@XZ
-?get_range@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$range@$02@56@XZ
-?get_range@image_plain@detail@_V1@sycl@@IEBA?AV?$range@$02@34@XZ
-?get_root_nodes@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
-?get_size@image_plain@detail@_V1@sycl@@IEBA_KXZ
-?get_size@stream@_V1@sycl@@QEBA_KXZ
-?get_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ
-?get_specialization_constant_impl@kernel_bundle_plain@detail@_V1@sycl@@IEBAXPEBDPEAX@Z
-?get_stream_mode@stream@_V1@sycl@@QEBA?AW4stream_manipulator@23@XZ
-?get_successors@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
-?get_type@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AW4image_type@23456@XZ
-?get_type@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AW4node_type@23456@XZ
-?get_wait_list@event@_V1@sycl@@QEAA?AV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@XZ
-?get_width@stream@_V1@sycl@@QEBA_KXZ
-?get_work_item_buffer_size@stream@_V1@sycl@@QEBA_KXZ
+?get_pointer_device@_V1@sycl@@YA?AVdevice@12@PEBXAEBVcontext@12@@Z
+?get_pointer_type@_V1@sycl@@YA?AW4alloc@usm@12@PEBXAEBVcontext@12@@Z
+?get_precision@stream@_V1@sycl@@QEBA_KXZ
+?get_predecessors@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
+?get_queue@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEBA?AVqueue@56@XZ
+?get_range@image_impl@detail@_V1@sycl@@QEBA?AV?$range@$02@34@XZ
+?get_range@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$range@$02@56@XZ
+?get_range@image_plain@detail@_V1@sycl@@IEBA?AV?$range@$02@34@XZ
+?get_root_nodes@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
+?get_size@image_plain@detail@_V1@sycl@@IEBA_KXZ
+?get_size@stream@_V1@sycl@@QEBA_KXZ
+?get_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ
+?get_specialization_constant_impl@kernel_bundle_plain@detail@_V1@sycl@@IEBAXPEBDPEAX@Z
+?get_stream_mode@stream@_V1@sycl@@QEBA?AW4stream_manipulator@23@XZ
+?get_successors@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
+?get_type@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AW4image_type@23456@XZ
+?get_type@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AW4node_type@23456@XZ
+?get_wait_list@event@_V1@sycl@@QEAA?AV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@XZ
+?get_width@stream@_V1@sycl@@QEBA_KXZ
+?get_work_item_buffer_size@stream@_V1@sycl@@QEBA_KXZ
 ?get_work_item_buffer_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ
 ?gpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z
 ?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$function@$$A6AXPEAX@Z@std@@_K_N@Z
@@ -1493,13 +1494,13 @@
 ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z
 ?setPitches@image_impl@detail@_V1@sycl@@AEAAXAEBV?$range@$01@34@@Z
 ?setPitches@image_impl@detail@_V1@sycl@@AEAAXXZ
-?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ
-?setStateSpecConstSet@handler@_V1@sycl@@AEAAXXZ
-?setType@handler@_V1@sycl@@AEAAXW4CGTYPE@CG@detail@23@@Z
-?setUserFacingNodeType@handler@_V1@sycl@@AEAAXW4node_type@experimental@oneapi@ext@23@@Z
-?set_final_data@SYCLMemObjT@detail@_V1@sycl@@QEAAX$$T@Z
-?set_final_data@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z
-?set_final_data_from_storage@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ
+?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ
+?setStateSpecConstSet@handler@_V1@sycl@@AEAAXXZ
+?setType@handler@_V1@sycl@@AEAAXW4CGTYPE@CG@detail@23@@Z
+?setUserFacingNodeType@handler@_V1@sycl@@AEAAXW4node_type@experimental@oneapi@ext@23@@Z
+?set_final_data@SYCLMemObjT@detail@_V1@sycl@@QEAAX$$T@Z
+?set_final_data@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z
+?set_final_data_from_storage@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ
 ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z
 ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXXZ
 ?set_final_data_internal@image_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z

From 584a6f94dbdaf8b1507030f47edde52e1064dc60 Mon Sep 17 00:00:00 2001
From: Dmitry Vodoypanov <dmitry.vodopyanov@intel.com>
Date: Sun, 18 Feb 2024 15:22:16 -0800
Subject: [PATCH 04/11] =?UTF-8?q?Fix=20'error:=20expression=20=E2=80=98<la?=
 =?UTF-8?q?mbda>=E2=80=99=20is=20not=20a=20constant=20expression'?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../experimental/device_architecture.hpp      | 36 ++++++++++++-------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
index 6e76be4dad8c6..401de7cc0666d 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
@@ -886,7 +886,7 @@ template <bool MakeCall> class if_architecture_helper {
     if constexpr (MakeCall &&
                   sycl::detail::device_architecture_comparison_aot<Arch>(
                       [](ext::oneapi::experimental::architecture a,
-                         ext::oneapi::experimental::architecture b) {
+                         ext::oneapi::experimental::architecture b) constexpr {
                         return a < b;
                       })) {
       fn();
@@ -907,7 +907,7 @@ template <bool MakeCall> class if_architecture_helper {
     if constexpr (MakeCall &&
                   sycl::detail::device_architecture_comparison_aot<Arch>(
                       [](ext::oneapi::experimental::architecture a,
-                         ext::oneapi::experimental::architecture b) {
+                         ext::oneapi::experimental::architecture b) constexpr {
                         return a <= b;
                       })) {
       fn();
@@ -927,7 +927,7 @@ template <bool MakeCall> class if_architecture_helper {
     if constexpr (MakeCall &&
                   sycl::detail::device_architecture_comparison_aot<Arch>(
                       [](ext::oneapi::experimental::architecture a,
-                         ext::oneapi::experimental::architecture b) {
+                         ext::oneapi::experimental::architecture b) constexpr {
                         return a > b;
                       })) {
       fn();
@@ -948,7 +948,7 @@ template <bool MakeCall> class if_architecture_helper {
     if constexpr (MakeCall &&
                   sycl::detail::device_architecture_comparison_aot<Arch>(
                       [](ext::oneapi::experimental::architecture a,
-                         ext::oneapi::experimental::architecture b) {
+                         ext::oneapi::experimental::architecture b) constexpr {
                         return a >= b;
                       })) {
       fn();
@@ -970,12 +970,12 @@ template <bool MakeCall> class if_architecture_helper {
     if constexpr (MakeCall &&
                   sycl::detail::device_architecture_comparison_aot<Arch1>(
                       [](ext::oneapi::experimental::architecture a,
-                         ext::oneapi::experimental::architecture b) {
+                         ext::oneapi::experimental::architecture b) constexpr {
                         return a >= b;
                       }) &&
                   sycl::detail::device_architecture_comparison_aot<Arch2>(
                       [](ext::oneapi::experimental::architecture a,
-                         ext::oneapi::experimental::architecture b) {
+                         ext::oneapi::experimental::architecture b) constexpr {
                         return a <= b;
                       })) {
       fn();
@@ -1035,7 +1035,9 @@ constexpr static auto if_architecture_is(T fn) {
 template <architecture Arch, typename T>
 constexpr static auto if_architecture_is_lt(T fn) {
   if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
-                    [](architecture a, architecture b) { return a < b; })) {
+                    [](architecture a, architecture b) constexpr {
+                      return a < b;
+                    })) {
     fn();
     return sycl::detail::if_architecture_helper<false>{};
   } else {
@@ -1050,7 +1052,9 @@ constexpr static auto if_architecture_is_lt(T fn) {
 template <architecture Arch, typename T>
 constexpr static auto if_architecture_is_le(T fn) {
   if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
-                    [](architecture a, architecture b) { return a <= b; })) {
+                    [](architecture a, architecture b) constexpr {
+                      return a <= b;
+                    })) {
     fn();
     return sycl::detail::if_architecture_helper<false>{};
   } else {
@@ -1065,7 +1069,9 @@ constexpr static auto if_architecture_is_le(T fn) {
 template <architecture Arch, typename T>
 constexpr static auto if_architecture_is_gt(T fn) {
   if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
-                    [](architecture a, architecture b) { return a > b; })) {
+                    [](architecture a, architecture b) constexpr {
+                      return a > b;
+                    })) {
     fn();
     return sycl::detail::if_architecture_helper<false>{};
   } else {
@@ -1080,7 +1086,9 @@ constexpr static auto if_architecture_is_gt(T fn) {
 template <architecture Arch, typename T>
 constexpr static auto if_architecture_is_ge(T fn) {
   if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
-                    [](architecture a, architecture b) { return a >= b; })) {
+                    [](architecture a, architecture b) constexpr {
+                      return a >= b;
+                    })) {
     fn();
     return sycl::detail::if_architecture_helper<false>{};
   } else {
@@ -1096,9 +1104,13 @@ constexpr static auto if_architecture_is_ge(T fn) {
 template <architecture Arch1, architecture Arch2, typename T>
 constexpr static auto if_architecture_is_between(T fn) {
   if constexpr (sycl::detail::device_architecture_comparison_aot<Arch1>(
-                    [](architecture a, architecture b) { return a >= b; }) &&
+                    [](architecture a, architecture b) constexpr {
+                      return a >= b;
+                    }) &&
                 sycl::detail::device_architecture_comparison_aot<Arch2>(
-                    [](architecture a, architecture b) { return a <= b; })) {
+                    [](architecture a, architecture b) constexpr {
+                      return a <= b;
+                    })) {
     fn();
     return sycl::detail::if_architecture_helper<false>{};
   } else {

From a0862a671da83237bd654ac92e8da62372c8b394 Mon Sep 17 00:00:00 2001
From: Dmitry Vodoypanov <dmitry.vodopyanov@intel.com>
Date: Sun, 18 Feb 2024 15:26:44 -0800
Subject: [PATCH 05/11] Remove unused header include

---
 .../include/sycl/ext/oneapi/experimental/device_architecture.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
index 401de7cc0666d..719ddf8180cca 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
@@ -9,7 +9,6 @@
 #pragma once
 
 #include <optional>
-#include <unordered_map>
 
 namespace sycl {
 inline namespace _V1 {

From ac7bfdad27924d3545181d82545ca227250a5bb2 Mon Sep 17 00:00:00 2001
From: Dmitry Vodoypanov <dmitry.vodopyanov@intel.com>
Date: Mon, 19 Feb 2024 00:46:14 -0800
Subject: [PATCH 06/11] =?UTF-8?q?Try=20to=20fix=20'error:=20expression=20?=
 =?UTF-8?q?=E2=80=98<lambda>=E2=80=99=20is=20not=20a=20constant=20expressi?=
 =?UTF-8?q?on'=20again?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../sycl/ext/oneapi/experimental/device_architecture.hpp   | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
index 719ddf8180cca..ba13dd9a7ddf1 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
@@ -1033,10 +1033,11 @@ constexpr static auto if_architecture_is(T fn) {
 /// family as @tparam Arch and compares less than @tparam Arch.
 template <architecture Arch, typename T>
 constexpr static auto if_architecture_is_lt(T fn) {
+  constexpr auto compare_op_lt = [](architecture a, architecture b) constexpr {
+    return a < b;
+  };
   if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
-                    [](architecture a, architecture b) constexpr {
-                      return a < b;
-                    })) {
+                    compare_op_lt)) {
     fn();
     return sycl::detail::if_architecture_helper<false>{};
   } else {

From 7d6d225264b46e3abe9cccb0025b19272d1180ff Mon Sep 17 00:00:00 2001
From: Dmitry Vodoypanov <dmitry.vodopyanov@intel.com>
Date: Mon, 19 Feb 2024 02:31:01 -0800
Subject: [PATCH 07/11] Try to fix again

---
 .../experimental/device_architecture.hpp      | 68 +++++++------------
 1 file changed, 25 insertions(+), 43 deletions(-)

diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
index ba13dd9a7ddf1..9b326c66f5e9c 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
@@ -837,6 +837,19 @@ constexpr static bool device_architecture_comparison_aot(Compare comp) {
   return false;
 }
 
+constexpr auto device_arch_compare_op_lt =
+    [](ext::oneapi::experimental::architecture a,
+       ext::oneapi::experimental::architecture b) constexpr { return a < b; };
+constexpr auto device_arch_compare_op_le =
+    [](ext::oneapi::experimental::architecture a,
+       ext::oneapi::experimental::architecture b) constexpr { return a <= b; };
+constexpr auto device_arch_compare_op_gt =
+    [](ext::oneapi::experimental::architecture a,
+       ext::oneapi::experimental::architecture b) constexpr { return a > b; };
+constexpr auto device_arch_compare_op_ge =
+    [](ext::oneapi::experimental::architecture a,
+       ext::oneapi::experimental::architecture b) constexpr { return a >= b; };
+
 // Helper object used to implement "else_if_architecture_is",
 // "else_if_architecture_is_*" and "otherwise". The "MakeCall" template
 // parameter tells whether a previous clause in the "if-elseif-elseif ..." chain
@@ -884,10 +897,7 @@ template <bool MakeCall> class if_architecture_helper {
   constexpr auto else_if_architecture_is_lt(T fn) {
     if constexpr (MakeCall &&
                   sycl::detail::device_architecture_comparison_aot<Arch>(
-                      [](ext::oneapi::experimental::architecture a,
-                         ext::oneapi::experimental::architecture b) constexpr {
-                        return a < b;
-                      })) {
+                      device_arch_compare_op_lt)) {
       fn();
       return sycl::detail::if_architecture_helper<false>{};
     } else {
@@ -905,10 +915,7 @@ template <bool MakeCall> class if_architecture_helper {
   constexpr auto else_if_architecture_is_le(T fn) {
     if constexpr (MakeCall &&
                   sycl::detail::device_architecture_comparison_aot<Arch>(
-                      [](ext::oneapi::experimental::architecture a,
-                         ext::oneapi::experimental::architecture b) constexpr {
-                        return a <= b;
-                      })) {
+                      device_arch_compare_op_le)) {
       fn();
       return sycl::detail::if_architecture_helper<false>{};
     } else {
@@ -925,10 +932,7 @@ template <bool MakeCall> class if_architecture_helper {
   constexpr auto else_if_architecture_is_gt(T fn) {
     if constexpr (MakeCall &&
                   sycl::detail::device_architecture_comparison_aot<Arch>(
-                      [](ext::oneapi::experimental::architecture a,
-                         ext::oneapi::experimental::architecture b) constexpr {
-                        return a > b;
-                      })) {
+                      device_arch_compare_op_gt)) {
       fn();
       return sycl::detail::if_architecture_helper<false>{};
     } else {
@@ -946,10 +950,7 @@ template <bool MakeCall> class if_architecture_helper {
   constexpr auto else_if_architecture_is_ge(T fn) {
     if constexpr (MakeCall &&
                   sycl::detail::device_architecture_comparison_aot<Arch>(
-                      [](ext::oneapi::experimental::architecture a,
-                         ext::oneapi::experimental::architecture b) constexpr {
-                        return a >= b;
-                      })) {
+                      device_arch_compare_op_ge)) {
       fn();
       return sycl::detail::if_architecture_helper<false>{};
     } else {
@@ -968,15 +969,9 @@ template <bool MakeCall> class if_architecture_helper {
   constexpr auto else_if_architecture_is_between(T fn) {
     if constexpr (MakeCall &&
                   sycl::detail::device_architecture_comparison_aot<Arch1>(
-                      [](ext::oneapi::experimental::architecture a,
-                         ext::oneapi::experimental::architecture b) constexpr {
-                        return a >= b;
-                      }) &&
+                      device_arch_compare_op_ge) &&
                   sycl::detail::device_architecture_comparison_aot<Arch2>(
-                      [](ext::oneapi::experimental::architecture a,
-                         ext::oneapi::experimental::architecture b) constexpr {
-                        return a <= b;
-                      })) {
+                      device_arch_compare_op_le)) {
       fn();
       return sycl::detail::if_architecture_helper<false>{};
     } else {
@@ -1033,11 +1028,8 @@ constexpr static auto if_architecture_is(T fn) {
 /// family as @tparam Arch and compares less than @tparam Arch.
 template <architecture Arch, typename T>
 constexpr static auto if_architecture_is_lt(T fn) {
-  constexpr auto compare_op_lt = [](architecture a, architecture b) constexpr {
-    return a < b;
-  };
   if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
-                    compare_op_lt)) {
+                    sycl::detail::device_arch_compare_op_lt)) {
     fn();
     return sycl::detail::if_architecture_helper<false>{};
   } else {
@@ -1052,9 +1044,7 @@ constexpr static auto if_architecture_is_lt(T fn) {
 template <architecture Arch, typename T>
 constexpr static auto if_architecture_is_le(T fn) {
   if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
-                    [](architecture a, architecture b) constexpr {
-                      return a <= b;
-                    })) {
+                    sycl::detail::device_arch_compare_op_le)) {
     fn();
     return sycl::detail::if_architecture_helper<false>{};
   } else {
@@ -1069,9 +1059,7 @@ constexpr static auto if_architecture_is_le(T fn) {
 template <architecture Arch, typename T>
 constexpr static auto if_architecture_is_gt(T fn) {
   if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
-                    [](architecture a, architecture b) constexpr {
-                      return a > b;
-                    })) {
+                    sycl::detail::device_arch_compare_op_gt)) {
     fn();
     return sycl::detail::if_architecture_helper<false>{};
   } else {
@@ -1086,9 +1074,7 @@ constexpr static auto if_architecture_is_gt(T fn) {
 template <architecture Arch, typename T>
 constexpr static auto if_architecture_is_ge(T fn) {
   if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
-                    [](architecture a, architecture b) constexpr {
-                      return a >= b;
-                    })) {
+                    sycl::detail::device_arch_compare_op_ge)) {
     fn();
     return sycl::detail::if_architecture_helper<false>{};
   } else {
@@ -1104,13 +1090,9 @@ constexpr static auto if_architecture_is_ge(T fn) {
 template <architecture Arch1, architecture Arch2, typename T>
 constexpr static auto if_architecture_is_between(T fn) {
   if constexpr (sycl::detail::device_architecture_comparison_aot<Arch1>(
-                    [](architecture a, architecture b) constexpr {
-                      return a >= b;
-                    }) &&
+                    sycl::detail::device_arch_compare_op_ge) &&
                 sycl::detail::device_architecture_comparison_aot<Arch2>(
-                    [](architecture a, architecture b) constexpr {
-                      return a <= b;
-                    })) {
+                    sycl::detail::device_arch_compare_op_le)) {
     fn();
     return sycl::detail::if_architecture_helper<false>{};
   } else {

From efef517fdbb880172e9e2fc4657c3be3a7acf7db Mon Sep 17 00:00:00 2001
From: Dmitry Vodoypanov <dmitry.vodopyanov@intel.com>
Date: Mon, 19 Feb 2024 05:01:31 -0800
Subject: [PATCH 08/11] Fix typo in comment

---
 sycl/include/sycl/device.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp
index 2b65e077bd08e..763304f84c3cb 100644
--- a/sycl/include/sycl/device.hpp
+++ b/sycl/include/sycl/device.hpp
@@ -267,7 +267,7 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase<device> {
   /// the function.
   bool ext_oneapi_architecture_is(ext::oneapi::experimental::architecture arch);
 
-  /// Indicates if the SYCL device architecture equals is in the category passed
+  /// Indicates if the SYCL device architecture is in the category passed
   /// to the function.
   ///
   /// \param category is one of the architecture categories from arch_category

From 5a8efb0ca2b8a44dd4bace4d3163415949bda28f Mon Sep 17 00:00:00 2001
From: Dmitry Vodoypanov <dmitry.vodopyanov@intel.com>
Date: Wed, 28 Feb 2024 06:37:11 -0800
Subject: [PATCH 09/11] Improve uniqueness of arch ID

---
 .../experimental/device_architecture.hpp      | 214 ++++++++++--------
 1 file changed, 121 insertions(+), 93 deletions(-)

diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
index 9b326c66f5e9c..69b0afc60e704 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
@@ -8,13 +8,14 @@
 
 #pragma once
 
+#include <cstdint> // for uint64_t
 #include <optional>
 
 namespace sycl {
 inline namespace _V1 {
 namespace ext::oneapi::experimental {
 
-enum class architecture {
+enum class architecture : uint64_t {
   // If new element is added to this enum:
   //
   // Update
@@ -28,116 +29,131 @@ enum class architecture {
   //   - the unique ID of the new architecture in SYCL RT source code to support
   //     querying the device architecture
   //
-  x86_64 = 0x00000000,
+  // Important note about keeping architecture IDs below unique:
+  //   - the architecture ID must be a hex number with 16 digits
+  //   - the architecture ID must suit the following template:
+  //     0x AAAA BB CCCCCCCC DD (without spaces), where
+  //       - AAAA is 4-digit PCI vendor ID of that architecture
+  //       - BB is 2-digit unique number of vendor's accelerator. It is 00 by
+  //         default. E.g., for Intel, the PCI vendor ID is the same for all
+  //         accelerators: for CPU and for GPU. In this case BB can be equal to
+  //         00 for CPU and 01 for GPU
+  //       - AAAABB number must be unique to the only one category from
+  //         arch_category enum below. Two or more categories cannot share the
+  //         same AAAABB number
+  //       - CCCCCCCC is 8-digit number of architecture itself. It must be
+  //         unique for all architectures inside the category
+  //       - DD is 2-digit number reserved for future unexpected modifications
+  //         to keep uniqueness. It should be always 00 for now
+  //
+  x86_64 = 0x0000000000000000,
   //
   // Intel CPU architectures
   //
-  // The requirement for the unique ID for intel_cpu_* architectures below is:
-  // - the ID must start with 0x0 (to avoid the integer overflow)
-  // - then goes Intel's vendor ID from underlied backend (which is 8086)
-  // - the ID ends with the architecture ID from the DEVICE_IP_VERSION extension
-  //   of underlied backend
-  intel_cpu_spr = 0x08086008,
-  intel_cpu_gnr = 0x08086009,
+  // AAAA is 8086, BB is 00,
+  // CCCCCCCC is the architecture ID from the DEVICE_IP_VERSION extension of
+  // underlied backend,
+  // DD is 00
+  intel_cpu_spr = 0x8086000000000800,
+  intel_cpu_gnr = 0x8086000000000900,
   //
   // Intel GPU architectures
   //
-  // The requirement for the unique ID for intel_gpu_* architectures below is:
-  // - the ID is GMDID of that architecture
-  intel_gpu_bdw = 0x02000000,
-  intel_gpu_skl = 0x02400009,
-  intel_gpu_kbl = 0x02404009,
-  intel_gpu_cfl = 0x02408009,
-  intel_gpu_apl = 0x0240c000,
+  // AAAA is 8086, BB is 01,
+  // CCCCCCCC is GMDID of that architecture,
+  // DD is 00
+  intel_gpu_bdw = 0x8086010200000000,
+  intel_gpu_skl = 0x8086010240000900,
+  intel_gpu_kbl = 0x8086010240400900,
+  intel_gpu_cfl = 0x8086010240800900,
+  intel_gpu_apl = 0x8086010240c00000,
   intel_gpu_bxt = intel_gpu_apl,
-  intel_gpu_glk = 0x02410000,
-  intel_gpu_whl = 0x02414000,
-  intel_gpu_aml = 0x02418000,
-  intel_gpu_cml = 0x0241c000,
-  intel_gpu_icllp = 0x02c00000,
-  intel_gpu_ehl = 0x02c08000,
+  intel_gpu_glk = 0x8086010241000000,
+  intel_gpu_whl = 0x8086010241400000,
+  intel_gpu_aml = 0x8086010241800000,
+  intel_gpu_cml = 0x8086010241c00000,
+  intel_gpu_icllp = 0x80860102c0000000,
+  intel_gpu_ehl = 0x80860102c0800000,
   intel_gpu_jsl = intel_gpu_ehl,
-  intel_gpu_tgllp = 0x03000000,
-  intel_gpu_rkl = 0x03004000,
-  intel_gpu_adl_s = 0x03008000,
+  intel_gpu_tgllp = 0x8086010300000000,
+  intel_gpu_rkl = 0x8086010300400000,
+  intel_gpu_adl_s = 0x8086010300800000,
   intel_gpu_rpl_s = intel_gpu_adl_s,
-  intel_gpu_adl_p = 0x0300c000,
-  intel_gpu_adl_n = 0x03010000,
-  intel_gpu_dg1 = 0x03028000,
-  intel_gpu_acm_g10 = 0x030dc008,
+  intel_gpu_adl_p = 0x8086010300c00000,
+  intel_gpu_adl_n = 0x8086010301000000,
+  intel_gpu_dg1 = 0x8086010302800000,
+  intel_gpu_acm_g10 = 0x808601030dc00800,
   intel_gpu_dg2_g10 = intel_gpu_acm_g10,
-  intel_gpu_acm_g11 = 0x030e0005,
+  intel_gpu_acm_g11 = 0x808601030e000500,
   intel_gpu_dg2_g11 = intel_gpu_acm_g11,
-  intel_gpu_acm_g12 = 0x030e4000,
+  intel_gpu_acm_g12 = 0x808601030e400000,
   intel_gpu_dg2_g12 = intel_gpu_acm_g12,
-  intel_gpu_pvc = 0x030f0007,
-  intel_gpu_pvc_vg = 0x030f4007,
+  intel_gpu_pvc = 0x808601030f000700,
+  intel_gpu_pvc_vg = 0x808601030f400700,
   //
   // NVIDIA architectures
   //
-  // The requirement for the unique ID for nvidia_gpu_* architectures below is:
-  // - the ID must start with NVIDIA's vendor ID from underlied backend (which
-  //   is 0x10de)
-  // - the ID must end with SM version ID of that architecture
-  nvidia_gpu_sm_50 = 0x10de0050,
-  nvidia_gpu_sm_52 = 0x10de0052,
-  nvidia_gpu_sm_53 = 0x10de0053,
-  nvidia_gpu_sm_60 = 0x10de0060,
-  nvidia_gpu_sm_61 = 0x10de0061,
-  nvidia_gpu_sm_62 = 0x10de0062,
-  nvidia_gpu_sm_70 = 0x10de0070,
-  nvidia_gpu_sm_72 = 0x10de0072,
-  nvidia_gpu_sm_75 = 0x10de0075,
-  nvidia_gpu_sm_80 = 0x10de0080,
-  nvidia_gpu_sm_86 = 0x10de0086,
-  nvidia_gpu_sm_87 = 0x10de0087,
-  nvidia_gpu_sm_89 = 0x10de0089,
-  nvidia_gpu_sm_90 = 0x10de0090,
+  // AAAA is 10de, BB is 00,
+  // CCCCCCCC is the SM version ID of that architecture,
+  // DD is 00
+  nvidia_gpu_sm_50 = 0x10de000000005000,
+  nvidia_gpu_sm_52 = 0x10de000000005200,
+  nvidia_gpu_sm_53 = 0x10de000000005300,
+  nvidia_gpu_sm_60 = 0x10de000000006000,
+  nvidia_gpu_sm_61 = 0x10de000000006100,
+  nvidia_gpu_sm_62 = 0x10de000000006200,
+  nvidia_gpu_sm_70 = 0x10de000000007000,
+  nvidia_gpu_sm_72 = 0x10de000000007200,
+  nvidia_gpu_sm_75 = 0x10de000000007500,
+  nvidia_gpu_sm_80 = 0x10de000000008000,
+  nvidia_gpu_sm_86 = 0x10de000000008600,
+  nvidia_gpu_sm_87 = 0x10de000000008700,
+  nvidia_gpu_sm_89 = 0x10de000000008900,
+  nvidia_gpu_sm_90 = 0x10de000000009000,
   //
   // AMD architectures
   //
-  // The requirement for the unique ID for amd_gpu_* architectures below is:
-  // - the ID must start with AMD's vendor ID from underlied backend (which is
-  //   0x1002)
-  // - the ID must end with GFX version ID of that architecture
-  amd_gpu_gfx700 = 0x10020700,
-  amd_gpu_gfx701 = 0x10020701,
-  amd_gpu_gfx702 = 0x10020702,
-  amd_gpu_gfx801 = 0x10020801,
-  amd_gpu_gfx802 = 0x10020802,
-  amd_gpu_gfx803 = 0x10020803,
-  amd_gpu_gfx805 = 0x10020805,
-  amd_gpu_gfx810 = 0x10020810,
-  amd_gpu_gfx900 = 0x10020900,
-  amd_gpu_gfx902 = 0x10020902,
-  amd_gpu_gfx904 = 0x10020904,
-  amd_gpu_gfx906 = 0x10020906,
-  amd_gpu_gfx908 = 0x10020908,
-  amd_gpu_gfx909 = 0x10020909,
-  amd_gpu_gfx90a = 0x1002090a,
-  amd_gpu_gfx90c = 0x1002090c,
-  amd_gpu_gfx940 = 0x10020940,
-  amd_gpu_gfx941 = 0x10020941,
-  amd_gpu_gfx942 = 0x10020942,
-  amd_gpu_gfx1010 = 0x10021010,
-  amd_gpu_gfx1011 = 0x10021011,
-  amd_gpu_gfx1012 = 0x10021012,
-  amd_gpu_gfx1013 = 0x10021013,
-  amd_gpu_gfx1030 = 0x10021030,
-  amd_gpu_gfx1031 = 0x10021031,
-  amd_gpu_gfx1032 = 0x10021032,
-  amd_gpu_gfx1033 = 0x10021033,
-  amd_gpu_gfx1034 = 0x10021034,
-  amd_gpu_gfx1035 = 0x10021035,
-  amd_gpu_gfx1036 = 0x10021036,
-  amd_gpu_gfx1100 = 0x10021100,
-  amd_gpu_gfx1101 = 0x10021101,
-  amd_gpu_gfx1102 = 0x10021102,
-  amd_gpu_gfx1103 = 0x10021103,
-  amd_gpu_gfx1150 = 0x10021150,
-  amd_gpu_gfx1151 = 0x10021151,
-  amd_gpu_gfx1200 = 0x10021200,
-  amd_gpu_gfx1201 = 0x10021201,
+  // AAAA is 1002, BB is 00,
+  // CCCCCCCC is the GFX version ID of that architecture,
+  // DD is 00
+  amd_gpu_gfx700 = 0x1002000000070000,
+  amd_gpu_gfx701 = 0x1002000000070100,
+  amd_gpu_gfx702 = 0x1002000000070200,
+  amd_gpu_gfx801 = 0x1002000000080100,
+  amd_gpu_gfx802 = 0x1002000000080200,
+  amd_gpu_gfx803 = 0x1002000000080300,
+  amd_gpu_gfx805 = 0x1002000000080500,
+  amd_gpu_gfx810 = 0x1002000000081000,
+  amd_gpu_gfx900 = 0x1002000000090000,
+  amd_gpu_gfx902 = 0x1002000000090200,
+  amd_gpu_gfx904 = 0x1002000000090400,
+  amd_gpu_gfx906 = 0x1002000000090600,
+  amd_gpu_gfx908 = 0x1002000000090800,
+  amd_gpu_gfx909 = 0x1002000000090900,
+  amd_gpu_gfx90a = 0x1002000000090a00,
+  amd_gpu_gfx90c = 0x1002000000090c00,
+  amd_gpu_gfx940 = 0x1002000000094000,
+  amd_gpu_gfx941 = 0x1002000000094100,
+  amd_gpu_gfx942 = 0x1002000000094200,
+  amd_gpu_gfx1010 = 0x1002000000101000,
+  amd_gpu_gfx1011 = 0x1002000000101100,
+  amd_gpu_gfx1012 = 0x1002000000101200,
+  amd_gpu_gfx1013 = 0x1002000000101300,
+  amd_gpu_gfx1030 = 0x1002000000103000,
+  amd_gpu_gfx1031 = 0x1002000000103100,
+  amd_gpu_gfx1032 = 0x1002000000103200,
+  amd_gpu_gfx1033 = 0x1002000000103300,
+  amd_gpu_gfx1034 = 0x1002000000103400,
+  amd_gpu_gfx1035 = 0x1002000000103500,
+  amd_gpu_gfx1036 = 0x1002000000103600,
+  amd_gpu_gfx1100 = 0x1002000000110000,
+  amd_gpu_gfx1101 = 0x1002000000110100,
+  amd_gpu_gfx1102 = 0x1002000000110200,
+  amd_gpu_gfx1103 = 0x1002000000110300,
+  amd_gpu_gfx1150 = 0x1002000000115000,
+  amd_gpu_gfx1151 = 0x1002000000115100,
+  amd_gpu_gfx1200 = 0x1002000000120000,
+  amd_gpu_gfx1201 = 0x1002000000120100,
   intel_gpu_8_0_0 = intel_gpu_bdw,
   intel_gpu_9_0_9 = intel_gpu_skl,
   intel_gpu_9_1_9 = intel_gpu_kbl,
@@ -153,6 +169,18 @@ enum class architecture {
 };
 
 enum class arch_category {
+  // If new element is added to this enum:
+  //
+  // Add
+  //   - "detail::min_<new_category>_architecture" variable below
+  //   - "detail::max_<new_category>_architecture" variable below
+  //
+  // Update
+  //   - "detail::get_category_min_architecture()" function below
+  //   - "detail::get_category_max_architecture()" function below
+  //   - "detail::get_device_architecture_category()" function below
+  //   - sycl_ext_oneapi_device_architecture specification doc
+  //
   intel_gpu = 0,
   nvidia_gpu = 1,
   amd_gpu = 2,

From bf270fcaeae5fe406642690353f510d155c60c05 Mon Sep 17 00:00:00 2001
From: Dmitry Vodoypanov <dmitry.vodopyanov@intel.com>
Date: Thu, 29 Feb 2024 05:44:44 -0800
Subject: [PATCH 10/11] Simplify architecture IDs

---
 .../experimental/device_architecture.hpp      | 196 +++++++++---------
 1 file changed, 95 insertions(+), 101 deletions(-)

diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
index 69b0afc60e704..4b99e67e3fea3 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
@@ -32,128 +32,122 @@ enum class architecture : uint64_t {
   // Important note about keeping architecture IDs below unique:
   //   - the architecture ID must be a hex number with 16 digits
   //   - the architecture ID must suit the following template:
-  //     0x AAAA BB CCCCCCCC DD (without spaces), where
-  //       - AAAA is 4-digit PCI vendor ID of that architecture
-  //       - BB is 2-digit unique number of vendor's accelerator. It is 00 by
-  //         default. E.g., for Intel, the PCI vendor ID is the same for all
-  //         accelerators: for CPU and for GPU. In this case BB can be equal to
-  //         00 for CPU and 01 for GPU
-  //       - AAAABB number must be unique to the only one category from
-  //         arch_category enum below. Two or more categories cannot share the
-  //         same AAAABB number
-  //       - CCCCCCCC is 8-digit number of architecture itself. It must be
+  //     0x AA BBBBBBBBBBBB CC (without spaces), where
+  //       - AA is 2-digit ID of category which must be unique
+  //       - BBBBBBBBBBBB is 12-digit number of architecture itself. It must be
   //         unique for all architectures inside the category
-  //       - DD is 2-digit number reserved for future unexpected modifications
+  //       - CC is 2-digit number reserved for future unexpected modifications
   //         to keep uniqueness. It should be always 00 for now
   //
-  x86_64 = 0x0000000000000000,
+  x86_64 = 0x9900000000000000,
   //
   // Intel CPU architectures
   //
-  // AAAA is 8086, BB is 00,
-  // CCCCCCCC is the architecture ID from the DEVICE_IP_VERSION extension of
+  // AAAA is 03,
+  // BBBBBBBBBBBB is the architecture ID from the DEVICE_IP_VERSION extension of
   // underlied backend,
-  // DD is 00
-  intel_cpu_spr = 0x8086000000000800,
-  intel_cpu_gnr = 0x8086000000000900,
+  // CC is 00
+  intel_cpu_spr = 0x0300000000000800,
+  intel_cpu_gnr = 0x0300000000000900,
   //
   // Intel GPU architectures
   //
-  // AAAA is 8086, BB is 01,
-  // CCCCCCCC is GMDID of that architecture,
-  // DD is 00
-  intel_gpu_bdw = 0x8086010200000000,
-  intel_gpu_skl = 0x8086010240000900,
-  intel_gpu_kbl = 0x8086010240400900,
-  intel_gpu_cfl = 0x8086010240800900,
-  intel_gpu_apl = 0x8086010240c00000,
+  // AA is 00,
+  // BBBBBBBBBBBB is GMDID of that architecture (4 zeros + 8-digit GMDID
+  // itself),
+  // CC is 00
+  intel_gpu_bdw = 0x0000000200000000,
+  intel_gpu_skl = 0x0000000240000900,
+  intel_gpu_kbl = 0x0000000240400900,
+  intel_gpu_cfl = 0x0000000240800900,
+  intel_gpu_apl = 0x0000000240c00000,
   intel_gpu_bxt = intel_gpu_apl,
-  intel_gpu_glk = 0x8086010241000000,
-  intel_gpu_whl = 0x8086010241400000,
-  intel_gpu_aml = 0x8086010241800000,
-  intel_gpu_cml = 0x8086010241c00000,
-  intel_gpu_icllp = 0x80860102c0000000,
-  intel_gpu_ehl = 0x80860102c0800000,
+  intel_gpu_glk = 0x0000000241000000,
+  intel_gpu_whl = 0x0000000241400000,
+  intel_gpu_aml = 0x0000000241800000,
+  intel_gpu_cml = 0x0000000241c00000,
+  intel_gpu_icllp = 0x00000002c0000000,
+  intel_gpu_ehl = 0x00000002c0800000,
   intel_gpu_jsl = intel_gpu_ehl,
-  intel_gpu_tgllp = 0x8086010300000000,
-  intel_gpu_rkl = 0x8086010300400000,
-  intel_gpu_adl_s = 0x8086010300800000,
+  intel_gpu_tgllp = 0x0000000300000000,
+  intel_gpu_rkl = 0x0000000300400000,
+  intel_gpu_adl_s = 0x0000000300800000,
   intel_gpu_rpl_s = intel_gpu_adl_s,
-  intel_gpu_adl_p = 0x8086010300c00000,
-  intel_gpu_adl_n = 0x8086010301000000,
-  intel_gpu_dg1 = 0x8086010302800000,
-  intel_gpu_acm_g10 = 0x808601030dc00800,
+  intel_gpu_adl_p = 0x0000000300c00000,
+  intel_gpu_adl_n = 0x0000000301000000,
+  intel_gpu_dg1 = 0x0000000302800000,
+  intel_gpu_acm_g10 = 0x000000030dc00800,
   intel_gpu_dg2_g10 = intel_gpu_acm_g10,
-  intel_gpu_acm_g11 = 0x808601030e000500,
+  intel_gpu_acm_g11 = 0x000000030e000500,
   intel_gpu_dg2_g11 = intel_gpu_acm_g11,
-  intel_gpu_acm_g12 = 0x808601030e400000,
+  intel_gpu_acm_g12 = 0x000000030e400000,
   intel_gpu_dg2_g12 = intel_gpu_acm_g12,
-  intel_gpu_pvc = 0x808601030f000700,
-  intel_gpu_pvc_vg = 0x808601030f400700,
+  intel_gpu_pvc = 0x000000030f000700,
+  intel_gpu_pvc_vg = 0x000000030f400700,
   //
   // NVIDIA architectures
   //
-  // AAAA is 10de, BB is 00,
-  // CCCCCCCC is the SM version ID of that architecture,
-  // DD is 00
-  nvidia_gpu_sm_50 = 0x10de000000005000,
-  nvidia_gpu_sm_52 = 0x10de000000005200,
-  nvidia_gpu_sm_53 = 0x10de000000005300,
-  nvidia_gpu_sm_60 = 0x10de000000006000,
-  nvidia_gpu_sm_61 = 0x10de000000006100,
-  nvidia_gpu_sm_62 = 0x10de000000006200,
-  nvidia_gpu_sm_70 = 0x10de000000007000,
-  nvidia_gpu_sm_72 = 0x10de000000007200,
-  nvidia_gpu_sm_75 = 0x10de000000007500,
-  nvidia_gpu_sm_80 = 0x10de000000008000,
-  nvidia_gpu_sm_86 = 0x10de000000008600,
-  nvidia_gpu_sm_87 = 0x10de000000008700,
-  nvidia_gpu_sm_89 = 0x10de000000008900,
-  nvidia_gpu_sm_90 = 0x10de000000009000,
+  // AA is 01,
+  // BBBBBBBBBBBB is the SM version ID of that architecture,
+  // CC is 00
+  nvidia_gpu_sm_50 = 0x0100000000005000,
+  nvidia_gpu_sm_52 = 0x0100000000005200,
+  nvidia_gpu_sm_53 = 0x0100000000005300,
+  nvidia_gpu_sm_60 = 0x0100000000006000,
+  nvidia_gpu_sm_61 = 0x0100000000006100,
+  nvidia_gpu_sm_62 = 0x0100000000006200,
+  nvidia_gpu_sm_70 = 0x0100000000007000,
+  nvidia_gpu_sm_72 = 0x0100000000007200,
+  nvidia_gpu_sm_75 = 0x0100000000007500,
+  nvidia_gpu_sm_80 = 0x0100000000008000,
+  nvidia_gpu_sm_86 = 0x0100000000008600,
+  nvidia_gpu_sm_87 = 0x0100000000008700,
+  nvidia_gpu_sm_89 = 0x0100000000008900,
+  nvidia_gpu_sm_90 = 0x0100000000009000,
   //
   // AMD architectures
   //
-  // AAAA is 1002, BB is 00,
-  // CCCCCCCC is the GFX version ID of that architecture,
-  // DD is 00
-  amd_gpu_gfx700 = 0x1002000000070000,
-  amd_gpu_gfx701 = 0x1002000000070100,
-  amd_gpu_gfx702 = 0x1002000000070200,
-  amd_gpu_gfx801 = 0x1002000000080100,
-  amd_gpu_gfx802 = 0x1002000000080200,
-  amd_gpu_gfx803 = 0x1002000000080300,
-  amd_gpu_gfx805 = 0x1002000000080500,
-  amd_gpu_gfx810 = 0x1002000000081000,
-  amd_gpu_gfx900 = 0x1002000000090000,
-  amd_gpu_gfx902 = 0x1002000000090200,
-  amd_gpu_gfx904 = 0x1002000000090400,
-  amd_gpu_gfx906 = 0x1002000000090600,
-  amd_gpu_gfx908 = 0x1002000000090800,
-  amd_gpu_gfx909 = 0x1002000000090900,
-  amd_gpu_gfx90a = 0x1002000000090a00,
-  amd_gpu_gfx90c = 0x1002000000090c00,
-  amd_gpu_gfx940 = 0x1002000000094000,
-  amd_gpu_gfx941 = 0x1002000000094100,
-  amd_gpu_gfx942 = 0x1002000000094200,
-  amd_gpu_gfx1010 = 0x1002000000101000,
-  amd_gpu_gfx1011 = 0x1002000000101100,
-  amd_gpu_gfx1012 = 0x1002000000101200,
-  amd_gpu_gfx1013 = 0x1002000000101300,
-  amd_gpu_gfx1030 = 0x1002000000103000,
-  amd_gpu_gfx1031 = 0x1002000000103100,
-  amd_gpu_gfx1032 = 0x1002000000103200,
-  amd_gpu_gfx1033 = 0x1002000000103300,
-  amd_gpu_gfx1034 = 0x1002000000103400,
-  amd_gpu_gfx1035 = 0x1002000000103500,
-  amd_gpu_gfx1036 = 0x1002000000103600,
-  amd_gpu_gfx1100 = 0x1002000000110000,
-  amd_gpu_gfx1101 = 0x1002000000110100,
-  amd_gpu_gfx1102 = 0x1002000000110200,
-  amd_gpu_gfx1103 = 0x1002000000110300,
-  amd_gpu_gfx1150 = 0x1002000000115000,
-  amd_gpu_gfx1151 = 0x1002000000115100,
-  amd_gpu_gfx1200 = 0x1002000000120000,
-  amd_gpu_gfx1201 = 0x1002000000120100,
+  // AA is 02,
+  // BBBBBBBBBBBB is the GFX version ID of that architecture,
+  // CC is 00
+  amd_gpu_gfx700 = 0x0200000000070000,
+  amd_gpu_gfx701 = 0x0200000000070100,
+  amd_gpu_gfx702 = 0x0200000000070200,
+  amd_gpu_gfx801 = 0x0200000000080100,
+  amd_gpu_gfx802 = 0x0200000000080200,
+  amd_gpu_gfx803 = 0x0200000000080300,
+  amd_gpu_gfx805 = 0x0200000000080500,
+  amd_gpu_gfx810 = 0x0200000000081000,
+  amd_gpu_gfx900 = 0x0200000000090000,
+  amd_gpu_gfx902 = 0x0200000000090200,
+  amd_gpu_gfx904 = 0x0200000000090400,
+  amd_gpu_gfx906 = 0x0200000000090600,
+  amd_gpu_gfx908 = 0x0200000000090800,
+  amd_gpu_gfx909 = 0x0200000000090900,
+  amd_gpu_gfx90a = 0x0200000000090a00,
+  amd_gpu_gfx90c = 0x0200000000090c00,
+  amd_gpu_gfx940 = 0x0200000000094000,
+  amd_gpu_gfx941 = 0x0200000000094100,
+  amd_gpu_gfx942 = 0x0200000000094200,
+  amd_gpu_gfx1010 = 0x0200000000101000,
+  amd_gpu_gfx1011 = 0x0200000000101100,
+  amd_gpu_gfx1012 = 0x0200000000101200,
+  amd_gpu_gfx1013 = 0x0200000000101300,
+  amd_gpu_gfx1030 = 0x0200000000103000,
+  amd_gpu_gfx1031 = 0x0200000000103100,
+  amd_gpu_gfx1032 = 0x0200000000103200,
+  amd_gpu_gfx1033 = 0x0200000000103300,
+  amd_gpu_gfx1034 = 0x0200000000103400,
+  amd_gpu_gfx1035 = 0x0200000000103500,
+  amd_gpu_gfx1036 = 0x0200000000103600,
+  amd_gpu_gfx1100 = 0x0200000000110000,
+  amd_gpu_gfx1101 = 0x0200000000110100,
+  amd_gpu_gfx1102 = 0x0200000000110200,
+  amd_gpu_gfx1103 = 0x0200000000110300,
+  amd_gpu_gfx1150 = 0x0200000000115000,
+  amd_gpu_gfx1151 = 0x0200000000115100,
+  amd_gpu_gfx1200 = 0x0200000000120000,
+  amd_gpu_gfx1201 = 0x0200000000120100,
   intel_gpu_8_0_0 = intel_gpu_bdw,
   intel_gpu_9_0_9 = intel_gpu_skl,
   intel_gpu_9_1_9 = intel_gpu_kbl,
@@ -184,7 +178,7 @@ enum class arch_category {
   intel_gpu = 0,
   nvidia_gpu = 1,
   amd_gpu = 2,
-  // TODO: add intel_cpu
+  // TODO: add intel_cpu = 3,
 };
 
 } // namespace ext::oneapi::experimental

From 28a152fa60bc85ccf8e3237fcb0af422deb30352 Mon Sep 17 00:00:00 2001
From: Dmitry Vodoypanov <dmitry.vodopyanov@intel.com>
Date: Tue, 5 Mar 2024 04:36:43 -0800
Subject: [PATCH 11/11] Apply CR changes

---
 .../experimental/device_architecture.hpp      | 29 +++++++++----------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
index 4b99e67e3fea3..fd38003944418 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
@@ -32,30 +32,29 @@ enum class architecture : uint64_t {
   // Important note about keeping architecture IDs below unique:
   //   - the architecture ID must be a hex number with 16 digits
   //   - the architecture ID must suit the following template:
-  //     0x AA BBBBBBBBBBBB CC (without spaces), where
-  //       - AA is 2-digit ID of category which must be unique
-  //       - BBBBBBBBBBBB is 12-digit number of architecture itself. It must be
-  //         unique for all architectures inside the category
-  //       - CC is 2-digit number reserved for future unexpected modifications
+  //     0x AA BBBB CCCCCCCC DD (without spaces), where
+  //       - AA is 2-digit ID of the architecture family which must be unique
+  //       - BBBB is 4-digit number reserved for future modifications
+  //         to keep uniqueness. It should be always 0000 for now
+  //       - CCCCCCCC is 8-digit number of architecture itself. It must be
+  //         unique for all architectures inside the family
+  //       - DD is 2-digit number reserved for future unexpected modifications
   //         to keep uniqueness. It should be always 00 for now
   //
   x86_64 = 0x9900000000000000,
   //
   // Intel CPU architectures
   //
-  // AAAA is 03,
-  // BBBBBBBBBBBB is the architecture ID from the DEVICE_IP_VERSION extension of
-  // underlied backend,
-  // CC is 00
+  // AA is 03,
+  // CCCCCCCC is the architecture ID from the DEVICE_IP_VERSION extension of
+  // underlied backend
   intel_cpu_spr = 0x0300000000000800,
   intel_cpu_gnr = 0x0300000000000900,
   //
   // Intel GPU architectures
   //
   // AA is 00,
-  // BBBBBBBBBBBB is GMDID of that architecture (4 zeros + 8-digit GMDID
-  // itself),
-  // CC is 00
+  // CCCCCCCC is GMDID of that architecture
   intel_gpu_bdw = 0x0000000200000000,
   intel_gpu_skl = 0x0000000240000900,
   intel_gpu_kbl = 0x0000000240400900,
@@ -88,8 +87,7 @@ enum class architecture : uint64_t {
   // NVIDIA architectures
   //
   // AA is 01,
-  // BBBBBBBBBBBB is the SM version ID of that architecture,
-  // CC is 00
+  // CCCCCCCC is the SM version ID of that architecture
   nvidia_gpu_sm_50 = 0x0100000000005000,
   nvidia_gpu_sm_52 = 0x0100000000005200,
   nvidia_gpu_sm_53 = 0x0100000000005300,
@@ -108,8 +106,7 @@ enum class architecture : uint64_t {
   // AMD architectures
   //
   // AA is 02,
-  // BBBBBBBBBBBB is the GFX version ID of that architecture,
-  // CC is 00
+  // CCCCCCCC is the GFX version ID of that architecture
   amd_gpu_gfx700 = 0x0200000000070000,
   amd_gpu_gfx701 = 0x0200000000070100,
   amd_gpu_gfx702 = 0x0200000000070200,