From 68445467cef311e41e3b4b1398b50a2943db9221 Mon Sep 17 00:00:00 2001
From: Dmitry Vodopyanov <dmitry.vodopyanov@intel.com>
Date: Wed, 6 Mar 2024 13:31:43 +0100
Subject: [PATCH] [SYCL] Implement device arch comparison according to
 sycl_ext_oneapi_device_architecture spec update (#12752)

This patch implements device AOT and host APIs for device architecture
comparison in accordance to [spec
update](https://github.com/intel/llvm/pull/12259)
---
 sycl/include/sycl/device.hpp                  |  11 +
 .../experimental/device_architecture.hpp      | 969 +++++++++++++-----
 sycl/source/detail/device_impl.hpp            |  13 +
 sycl/source/device.cpp                        |   6 +
 ..._architecture_comparison_on_device_aot.cpp | 199 ++++
 ...device_architecture_comparison_on_host.cpp |  42 +
 sycl/test/abi/sycl_symbols_linux.dump         |   1 +
 sycl/test/abi/sycl_symbols_windows.dump       |   1 +
 8 files changed, 988 insertions(+), 254 deletions(-)
 create mode 100644 sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp
 create mode 100644 sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp
diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp
index df265e1585e83..f3e1937040dba 100644
--- a/sycl/include/sycl/device.hpp
+++ b/sycl/include/sycl/device.hpp
@@ -282,6 +282,17 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase<device> {
   /// the function.
   bool ext_oneapi_architecture_is(ext::oneapi::experimental::architecture arch);
 
+  /// Indicates if the SYCL device architecture is in the category passed
+  /// to the function.
+  ///
+  /// \param category is one of the architecture categories from arch_category
+  /// enum described in sycl_ext_oneapi_device_architecture specification.
+  ///
+  /// \return true if the SYCL device architecture is in the category passed to
+  /// the function.
+  bool
+  ext_oneapi_architecture_is(ext::oneapi::experimental::arch_category category);
+
 // TODO: Remove this diagnostics when __SYCL_WARN_IMAGE_ASPECT is removed.
 #if defined(__clang__)
 #pragma clang diagnostic pop
diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
index cbd76058825d9..fd38003944418 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
@@ -8,14 +8,19 @@
 
 #pragma once
 
+#include <cstdint> // for uint64_t
+#include <optional>
+
 namespace sycl {
 inline namespace _V1 {
 namespace ext::oneapi::experimental {
 
-enum class architecture {
+enum class architecture : uint64_t {
   // If new element is added to this enum:
   //
   // Update
+  //   - "detail::min_<category>_architecture" below if needed
+  //   - "detail::max_<category>_architecture" below if needed
   //   - sycl_ext_oneapi_device_architecture specification doc
   //   - "-fsycl-targets" description in sycl/doc/UsersManual.md
   //
@@ -24,92 +29,122 @@ enum class architecture {
   //   - the unique ID of the new architecture in SYCL RT source code to support
   //     querying the device architecture
   //
-  x86_64,
-  intel_cpu_spr,
-  intel_cpu_gnr,
-  intel_gpu_bdw,
-  intel_gpu_skl,
-  intel_gpu_kbl,
-  intel_gpu_cfl,
-  intel_gpu_apl,
+  // Important note about keeping architecture IDs below unique:
+  //   - the architecture ID must be a hex number with 16 digits
+  //   - the architecture ID must suit the following template:
+  //     0x AA BBBB CCCCCCCC DD (without spaces), where
+  //       - AA is 2-digit ID of the architecture family which must be unique
+  //       - BBBB is 4-digit number reserved for future modifications
+  //         to keep uniqueness. It should be always 0000 for now
+  //       - CCCCCCCC is 8-digit number of architecture itself. It must be
+  //         unique for all architectures inside the family
+  //       - DD is 2-digit number reserved for future unexpected modifications
+  //         to keep uniqueness. It should be always 00 for now
+  //
+  x86_64 = 0x9900000000000000,
+  //
+  // Intel CPU architectures
+  //
+  // AA is 03,
+  // CCCCCCCC is the architecture ID from the DEVICE_IP_VERSION extension of
+  // underlied backend
+  intel_cpu_spr = 0x0300000000000800,
+  intel_cpu_gnr = 0x0300000000000900,
+  //
+  // Intel GPU architectures
+  //
+  // AA is 00,
+  // CCCCCCCC is GMDID of that architecture
+  intel_gpu_bdw = 0x0000000200000000,
+  intel_gpu_skl = 0x0000000240000900,
+  intel_gpu_kbl = 0x0000000240400900,
+  intel_gpu_cfl = 0x0000000240800900,
+  intel_gpu_apl = 0x0000000240c00000,
   intel_gpu_bxt = intel_gpu_apl,
-  intel_gpu_glk,
-  intel_gpu_whl,
-  intel_gpu_aml,
-  intel_gpu_cml,
-  intel_gpu_icllp,
-  intel_gpu_ehl,
+  intel_gpu_glk = 0x0000000241000000,
+  intel_gpu_whl = 0x0000000241400000,
+  intel_gpu_aml = 0x0000000241800000,
+  intel_gpu_cml = 0x0000000241c00000,
+  intel_gpu_icllp = 0x00000002c0000000,
+  intel_gpu_ehl = 0x00000002c0800000,
   intel_gpu_jsl = intel_gpu_ehl,
-  intel_gpu_tgllp,
-  intel_gpu_rkl,
-  intel_gpu_adl_s,
+  intel_gpu_tgllp = 0x0000000300000000,
+  intel_gpu_rkl = 0x0000000300400000,
+  intel_gpu_adl_s = 0x0000000300800000,
   intel_gpu_rpl_s = intel_gpu_adl_s,
-  intel_gpu_adl_p,
-  intel_gpu_adl_n,
-  intel_gpu_dg1,
-  intel_gpu_acm_g10,
+  intel_gpu_adl_p = 0x0000000300c00000,
+  intel_gpu_adl_n = 0x0000000301000000,
+  intel_gpu_dg1 = 0x0000000302800000,
+  intel_gpu_acm_g10 = 0x000000030dc00800,
   intel_gpu_dg2_g10 = intel_gpu_acm_g10,
-  intel_gpu_acm_g11,
+  intel_gpu_acm_g11 = 0x000000030e000500,
   intel_gpu_dg2_g11 = intel_gpu_acm_g11,
-  intel_gpu_acm_g12,
+  intel_gpu_acm_g12 = 0x000000030e400000,
   intel_gpu_dg2_g12 = intel_gpu_acm_g12,
-  intel_gpu_pvc,
-  intel_gpu_pvc_vg,
+  intel_gpu_pvc = 0x000000030f000700,
+  intel_gpu_pvc_vg = 0x000000030f400700,
+  //
   // NVIDIA architectures
-  nvidia_gpu_sm_50,
-  nvidia_gpu_sm_52,
-  nvidia_gpu_sm_53,
-  nvidia_gpu_sm_60,
-  nvidia_gpu_sm_61,
-  nvidia_gpu_sm_62,
-  nvidia_gpu_sm_70,
-  nvidia_gpu_sm_72,
-  nvidia_gpu_sm_75,
-  nvidia_gpu_sm_80,
-  nvidia_gpu_sm_86,
-  nvidia_gpu_sm_87,
-  nvidia_gpu_sm_89,
-  nvidia_gpu_sm_90,
+  //
+  // AA is 01,
+  // CCCCCCCC is the SM version ID of that architecture
+  nvidia_gpu_sm_50 = 0x0100000000005000,
+  nvidia_gpu_sm_52 = 0x0100000000005200,
+  nvidia_gpu_sm_53 = 0x0100000000005300,
+  nvidia_gpu_sm_60 = 0x0100000000006000,
+  nvidia_gpu_sm_61 = 0x0100000000006100,
+  nvidia_gpu_sm_62 = 0x0100000000006200,
+  nvidia_gpu_sm_70 = 0x0100000000007000,
+  nvidia_gpu_sm_72 = 0x0100000000007200,
+  nvidia_gpu_sm_75 = 0x0100000000007500,
+  nvidia_gpu_sm_80 = 0x0100000000008000,
+  nvidia_gpu_sm_86 = 0x0100000000008600,
+  nvidia_gpu_sm_87 = 0x0100000000008700,
+  nvidia_gpu_sm_89 = 0x0100000000008900,
+  nvidia_gpu_sm_90 = 0x0100000000009000,
+  //
   // AMD architectures
-  amd_gpu_gfx700,
-  amd_gpu_gfx701,
-  amd_gpu_gfx702,
-  amd_gpu_gfx801,
-  amd_gpu_gfx802,
-  amd_gpu_gfx803,
-  amd_gpu_gfx805,
-  amd_gpu_gfx810,
-  amd_gpu_gfx900,
-  amd_gpu_gfx902,
-  amd_gpu_gfx904,
-  amd_gpu_gfx906,
-  amd_gpu_gfx908,
-  amd_gpu_gfx909,
-  amd_gpu_gfx90a,
-  amd_gpu_gfx90c,
-  amd_gpu_gfx940,
-  amd_gpu_gfx941,
-  amd_gpu_gfx942,
-  amd_gpu_gfx1010,
-  amd_gpu_gfx1011,
-  amd_gpu_gfx1012,
-  amd_gpu_gfx1013,
-  amd_gpu_gfx1030,
-  amd_gpu_gfx1031,
-  amd_gpu_gfx1032,
-  amd_gpu_gfx1033,
-  amd_gpu_gfx1034,
-  amd_gpu_gfx1035,
-  amd_gpu_gfx1036,
-  amd_gpu_gfx1100,
-  amd_gpu_gfx1101,
-  amd_gpu_gfx1102,
-  amd_gpu_gfx1103,
-  amd_gpu_gfx1150,
-  amd_gpu_gfx1151,
-  amd_gpu_gfx1200,
-  amd_gpu_gfx1201,
-  // Update "detail::max_architecture" below if you add new elements here!
+  //
+  // AA is 02,
+  // CCCCCCCC is the GFX version ID of that architecture
+  amd_gpu_gfx700 = 0x0200000000070000,
+  amd_gpu_gfx701 = 0x0200000000070100,
+  amd_gpu_gfx702 = 0x0200000000070200,
+  amd_gpu_gfx801 = 0x0200000000080100,
+  amd_gpu_gfx802 = 0x0200000000080200,
+  amd_gpu_gfx803 = 0x0200000000080300,
+  amd_gpu_gfx805 = 0x0200000000080500,
+  amd_gpu_gfx810 = 0x0200000000081000,
+  amd_gpu_gfx900 = 0x0200000000090000,
+  amd_gpu_gfx902 = 0x0200000000090200,
+  amd_gpu_gfx904 = 0x0200000000090400,
+  amd_gpu_gfx906 = 0x0200000000090600,
+  amd_gpu_gfx908 = 0x0200000000090800,
+  amd_gpu_gfx909 = 0x0200000000090900,
+  amd_gpu_gfx90a = 0x0200000000090a00,
+  amd_gpu_gfx90c = 0x0200000000090c00,
+  amd_gpu_gfx940 = 0x0200000000094000,
+  amd_gpu_gfx941 = 0x0200000000094100,
+  amd_gpu_gfx942 = 0x0200000000094200,
+  amd_gpu_gfx1010 = 0x0200000000101000,
+  amd_gpu_gfx1011 = 0x0200000000101100,
+  amd_gpu_gfx1012 = 0x0200000000101200,
+  amd_gpu_gfx1013 = 0x0200000000101300,
+  amd_gpu_gfx1030 = 0x0200000000103000,
+  amd_gpu_gfx1031 = 0x0200000000103100,
+  amd_gpu_gfx1032 = 0x0200000000103200,
+  amd_gpu_gfx1033 = 0x0200000000103300,
+  amd_gpu_gfx1034 = 0x0200000000103400,
+  amd_gpu_gfx1035 = 0x0200000000103500,
+  amd_gpu_gfx1036 = 0x0200000000103600,
+  amd_gpu_gfx1100 = 0x0200000000110000,
+  amd_gpu_gfx1101 = 0x0200000000110100,
+  amd_gpu_gfx1102 = 0x0200000000110200,
+  amd_gpu_gfx1103 = 0x0200000000110300,
+  amd_gpu_gfx1150 = 0x0200000000115000,
+  amd_gpu_gfx1151 = 0x0200000000115100,
+  amd_gpu_gfx1200 = 0x0200000000120000,
+  amd_gpu_gfx1201 = 0x0200000000120100,
   intel_gpu_8_0_0 = intel_gpu_bdw,
   intel_gpu_9_0_9 = intel_gpu_skl,
   intel_gpu_9_1_9 = intel_gpu_kbl,
@@ -124,12 +159,49 @@ enum class architecture {
   intel_gpu_12_10_0 = intel_gpu_dg1,
 };
 
+enum class arch_category {
+  // If new element is added to this enum:
+  //
+  // Add
+  //   - "detail::min_<new_category>_architecture" variable below
+  //   - "detail::max_<new_category>_architecture" variable below
+  //
+  // Update
+  //   - "detail::get_category_min_architecture()" function below
+  //   - "detail::get_category_max_architecture()" function below
+  //   - "detail::get_device_architecture_category()" function below
+  //   - sycl_ext_oneapi_device_architecture specification doc
+  //
+  intel_gpu = 0,
+  nvidia_gpu = 1,
+  amd_gpu = 2,
+  // TODO: add intel_cpu = 3,
+};
+
 } // namespace ext::oneapi::experimental
 
 namespace detail {
 
-static constexpr ext::oneapi::experimental::architecture max_architecture =
-    ext::oneapi::experimental::architecture::amd_gpu_gfx1201;
+static constexpr ext::oneapi::experimental::architecture
+    min_intel_gpu_architecture =
+        ext::oneapi::experimental::architecture::intel_gpu_bdw;
+static constexpr ext::oneapi::experimental::architecture
+    max_intel_gpu_architecture =
+        ext::oneapi::experimental::architecture::intel_gpu_pvc_vg;
+
+static constexpr ext::oneapi::experimental::architecture
+    min_nvidia_gpu_architecture =
+        ext::oneapi::experimental::architecture::nvidia_gpu_sm_50;
+static constexpr ext::oneapi::experimental::architecture
+    max_nvidia_gpu_architecture =
+        ext::oneapi::experimental::architecture::nvidia_gpu_sm_90;
+
+static constexpr ext::oneapi::experimental::architecture
+    min_amd_gpu_architecture =
+        ext::oneapi::experimental::architecture::amd_gpu_gfx700;
+static constexpr ext::oneapi::experimental::architecture
+    max_amd_gpu_architecture =
+        ext::oneapi::experimental::architecture::amd_gpu_gfx1201;
 
 #ifndef __SYCL_TARGET_INTEL_X86_64__
 #define __SYCL_TARGET_INTEL_X86_64__ 0
@@ -439,167 +511,247 @@ static constexpr bool is_allowable_aot_mode =
     (__SYCL_TARGET_AMD_GPU_GFX1200__ == 1) ||
     (__SYCL_TARGET_AMD_GPU_GFX1201__ == 1);
 
-struct IsAOTForArchitectureClass {
-  // Allocate an array of size == size of
-  // ext::oneapi::experimental::architecture enum.
-  bool arr[static_cast<int>(max_architecture) + 1];
-
-  using arch = ext::oneapi::experimental::architecture;
-
-  constexpr IsAOTForArchitectureClass() : arr() {
-    arr[static_cast<int>(arch::x86_64)] = __SYCL_TARGET_INTEL_X86_64__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_bdw)] =
-        __SYCL_TARGET_INTEL_GPU_BDW__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_skl)] =
-        __SYCL_TARGET_INTEL_GPU_SKL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_kbl)] =
-        __SYCL_TARGET_INTEL_GPU_KBL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_cfl)] =
-        __SYCL_TARGET_INTEL_GPU_CFL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_apl)] =
-        __SYCL_TARGET_INTEL_GPU_APL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_glk)] =
-        __SYCL_TARGET_INTEL_GPU_GLK__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_whl)] =
-        __SYCL_TARGET_INTEL_GPU_WHL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_aml)] =
-        __SYCL_TARGET_INTEL_GPU_AML__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_cml)] =
-        __SYCL_TARGET_INTEL_GPU_CML__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_icllp)] =
-        __SYCL_TARGET_INTEL_GPU_ICLLP__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_ehl)] =
-        __SYCL_TARGET_INTEL_GPU_EHL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_tgllp)] =
-        __SYCL_TARGET_INTEL_GPU_TGLLP__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_rkl)] =
-        __SYCL_TARGET_INTEL_GPU_RKL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_adl_s)] =
-        __SYCL_TARGET_INTEL_GPU_ADL_S__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_adl_p)] =
-        __SYCL_TARGET_INTEL_GPU_ADL_P__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_adl_n)] =
-        __SYCL_TARGET_INTEL_GPU_ADL_N__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_dg1)] =
-        __SYCL_TARGET_INTEL_GPU_DG1__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_acm_g10)] =
-        __SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_acm_g11)] =
-        __SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_acm_g12)] =
-        __SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_pvc)] =
-        __SYCL_TARGET_INTEL_GPU_PVC__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_pvc_vg)] =
-        __SYCL_TARGET_INTEL_GPU_PVC_VG__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_50)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM50__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_52)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM52__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_53)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM53__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_60)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM60__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_61)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM61__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_62)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM62__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_70)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM70__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_72)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM72__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_75)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM75__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_80)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM80__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_86)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM86__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_87)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM87__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_89)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM89__ == 1;
-    arr[static_cast<int>(arch::nvidia_gpu_sm_90)] =
-        __SYCL_TARGET_NVIDIA_GPU_SM90__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx700)] =
-        __SYCL_TARGET_AMD_GPU_GFX700__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx701)] =
-        __SYCL_TARGET_AMD_GPU_GFX701__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx702)] =
-        __SYCL_TARGET_AMD_GPU_GFX702__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx801)] =
-        __SYCL_TARGET_AMD_GPU_GFX801__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx802)] =
-        __SYCL_TARGET_AMD_GPU_GFX802__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx803)] =
-        __SYCL_TARGET_AMD_GPU_GFX803__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx805)] =
-        __SYCL_TARGET_AMD_GPU_GFX805__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx810)] =
-        __SYCL_TARGET_AMD_GPU_GFX810__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx900)] =
-        __SYCL_TARGET_AMD_GPU_GFX900__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx902)] =
-        __SYCL_TARGET_AMD_GPU_GFX902__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx904)] =
-        __SYCL_TARGET_AMD_GPU_GFX904__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx906)] =
-        __SYCL_TARGET_AMD_GPU_GFX906__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx908)] =
-        __SYCL_TARGET_AMD_GPU_GFX908__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx909)] =
-        __SYCL_TARGET_AMD_GPU_GFX909__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx90a)] =
-        __SYCL_TARGET_AMD_GPU_GFX90A__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx90c)] =
-        __SYCL_TARGET_AMD_GPU_GFX90C__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx940)] =
-        __SYCL_TARGET_AMD_GPU_GFX940__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx941)] =
-        __SYCL_TARGET_AMD_GPU_GFX941__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx942)] =
-        __SYCL_TARGET_AMD_GPU_GFX942__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1010)] =
-        __SYCL_TARGET_AMD_GPU_GFX1010__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1011)] =
-        __SYCL_TARGET_AMD_GPU_GFX1011__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1012)] =
-        __SYCL_TARGET_AMD_GPU_GFX1012__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1030)] =
-        __SYCL_TARGET_AMD_GPU_GFX1030__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1031)] =
-        __SYCL_TARGET_AMD_GPU_GFX1031__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1032)] =
-        __SYCL_TARGET_AMD_GPU_GFX1032__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1033)] =
-        __SYCL_TARGET_AMD_GPU_GFX1033__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1034)] =
-        __SYCL_TARGET_AMD_GPU_GFX1034__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1035)] =
-        __SYCL_TARGET_AMD_GPU_GFX1035__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1036)] =
-        __SYCL_TARGET_AMD_GPU_GFX1036__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1100)] =
-        __SYCL_TARGET_AMD_GPU_GFX1100__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1101)] =
-        __SYCL_TARGET_AMD_GPU_GFX1101__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1102)] =
-        __SYCL_TARGET_AMD_GPU_GFX1102__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1103)] =
-        __SYCL_TARGET_AMD_GPU_GFX1103__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1150)] =
-        __SYCL_TARGET_AMD_GPU_GFX1150__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1151)] =
-        __SYCL_TARGET_AMD_GPU_GFX1151__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1200)] =
-        __SYCL_TARGET_AMD_GPU_GFX1200__ == 1;
-    arr[static_cast<int>(arch::amd_gpu_gfx1201)] =
-        __SYCL_TARGET_AMD_GPU_GFX1201__ == 1;
-  }
-};
+constexpr static std::optional<ext::oneapi::experimental::architecture>
+get_current_architecture_aot() {
+  // TODO: re-write the logic below when sycl_ext_oneapi_device_architecture
+  // will support targets more than one in -fsycl-targets
+#if __SYCL_TARGET_INTEL_X86_64__
+  return ext::oneapi::experimental::architecture::x86_64;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_BDW__
+  return ext::oneapi::experimental::architecture::intel_gpu_bdw;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_SKL__
+  return ext::oneapi::experimental::architecture::intel_gpu_skl;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_KBL__
+  return ext::oneapi::experimental::architecture::intel_gpu_kbl;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_CFL__
+  return ext::oneapi::experimental::architecture::intel_gpu_cfl;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_APL__
+  return ext::oneapi::experimental::architecture::intel_gpu_apl;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_GLK__
+  return ext::oneapi::experimental::architecture::intel_gpu_glk;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_WHL__
+  return ext::oneapi::experimental::architecture::intel_gpu_whl;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_AML__
+  return ext::oneapi::experimental::architecture::intel_gpu_aml;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_CML__
+  return ext::oneapi::experimental::architecture::intel_gpu_cml;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ICLLP__
+  return ext::oneapi::experimental::architecture::intel_gpu_icllp;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_EHL__
+  return ext::oneapi::experimental::architecture::intel_gpu_ehl;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_TGLLP__
+  return ext::oneapi::experimental::architecture::intel_gpu_tgllp;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_RKL__
+  return ext::oneapi::experimental::architecture::intel_gpu_rkl;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ADL_S__
+  return ext::oneapi::experimental::architecture::intel_gpu_adl_s;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ADL_P__
+  return ext::oneapi::experimental::architecture::intel_gpu_adl_p;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ADL_P__
+  return ext::oneapi::experimental::architecture::intel_gpu_adl_p;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ADL_N__
+  return ext::oneapi::experimental::architecture::intel_gpu_adl_n;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_DG1__
+  return ext::oneapi::experimental::architecture::intel_gpu_dg1;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ACM_G10__
+  return ext::oneapi::experimental::architecture::intel_gpu_acm_g10;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ACM_G11__
+  return ext::oneapi::experimental::architecture::intel_gpu_acm_g11;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_ACM_G12__
+  return ext::oneapi::experimental::architecture::intel_gpu_acm_g12;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_PVC__
+  return ext::oneapi::experimental::architecture::intel_gpu_pvc;
+#endif
+#if __SYCL_TARGET_INTEL_GPU_PVC_VG__
+  return ext::oneapi::experimental::architecture::intel_gpu_pvc_vg;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM50__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_50;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM52__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_52;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM53__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_53;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM60__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_60;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM61__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_61;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM62__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_62;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM70__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_70;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM72__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_72;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM75__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_75;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM80__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_80;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM86__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_86;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM87__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_87;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM89__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_89;
+#endif
+#if __SYCL_TARGET_NVIDIA_GPU_SM90__
+  return ext::oneapi::experimental::architecture::nvidia_gpu_sm_90;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX700__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx700;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX701__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx701;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX702__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx702;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX801__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx801;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX802__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx802;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX803__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx803;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX805__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx805;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX810__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx810;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX900__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx900;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX902__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx902;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX904__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx904;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX906__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx906;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX908__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx908;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX909__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx909;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX90a__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx90a;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX90c__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx90c;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX940__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx940;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX941__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx941;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX942__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx942;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1010__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1010;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1011__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1011;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1012__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1012;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1030__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1030;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1031__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1031;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1032__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1032;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1033__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1033;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1034__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1034;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1035__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1035;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1036__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1036;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1100__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1100;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1101__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1101;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1102__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1102;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1103__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1103;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1150__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1150;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1151__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1151;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1200__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1200;
+#endif
+#if __SYCL_TARGET_AMD_GPU_GFX1201__
+  return ext::oneapi::experimental::architecture::amd_gpu_gfx1201;
+#endif
+  return std::nullopt;
+}
 
-// One entry for each enumerator in "architecture" telling whether the AOT
-// target matches that architecture.
-static constexpr IsAOTForArchitectureClass is_aot_for_architecture;
+// Tells if the AOT target matches that architecture.
+constexpr static bool
+is_aot_for_architecture(ext::oneapi::experimental::architecture arch) {
+  constexpr std::optional<ext::oneapi::experimental::architecture>
+      current_arch = get_current_architecture_aot();
+  if (current_arch.has_value())
+    return arch == *current_arch;
+  return false;
+}
 
 // Reads the value of "is_allowable_aot_mode" via a template to defer triggering
 // static_assert() until template instantiation time.
@@ -612,28 +764,241 @@ constexpr static bool allowable_aot_mode() {
 // pack.
 template <ext::oneapi::experimental::architecture... Archs>
 constexpr static bool device_architecture_is() {
-  return (is_aot_for_architecture.arr[static_cast<int>(Archs)] || ...);
+  return (is_aot_for_architecture(Archs) || ...);
+}
+
+static constexpr std::optional<ext::oneapi::experimental::architecture>
+get_category_min_architecture(
+    ext::oneapi::experimental::arch_category Category) {
+  if (Category == ext::oneapi::experimental::arch_category::intel_gpu) {
+    return min_intel_gpu_architecture;
+  } else if (Category == ext::oneapi::experimental::arch_category::nvidia_gpu) {
+    return min_nvidia_gpu_architecture;
+  } else if (Category == ext::oneapi::experimental::arch_category::amd_gpu) {
+    return min_amd_gpu_architecture;
+  } // add "else if " when adding new category, "else" not needed
+  return std::nullopt;
+}
+
+static constexpr std::optional<ext::oneapi::experimental::architecture>
+get_category_max_architecture(
+    ext::oneapi::experimental::arch_category Category) {
+  if (Category == ext::oneapi::experimental::arch_category::intel_gpu) {
+    return max_intel_gpu_architecture;
+  } else if (Category == ext::oneapi::experimental::arch_category::nvidia_gpu) {
+    return max_nvidia_gpu_architecture;
+  } else if (Category == ext::oneapi::experimental::arch_category::amd_gpu) {
+    return max_amd_gpu_architecture;
+  } // add "else if " when adding new category, "else" not needed
+  return std::nullopt;
+}
+
+template <ext::oneapi::experimental::arch_category Category>
+constexpr static bool device_architecture_is_in_category_aot() {
+  constexpr std::optional<ext::oneapi::experimental::architecture>
+      category_min_arch = get_category_min_architecture(Category);
+  constexpr std::optional<ext::oneapi::experimental::architecture>
+      category_max_arch = get_category_max_architecture(Category);
+  constexpr std::optional<ext::oneapi::experimental::architecture>
+      current_arch = get_current_architecture_aot();
+
+  if (category_min_arch.has_value() && category_max_arch.has_value() &&
+      current_arch.has_value())
+    if ((*category_min_arch <= *current_arch) &&
+        (*current_arch <= *category_max_arch))
+      return true;
+
+  return false;
+}
+
+template <ext::oneapi::experimental::arch_category... Categories>
+constexpr static bool device_architecture_is_in_categories() {
+  return (device_architecture_is_in_category_aot<Categories>() || ...);
+}
+
+constexpr static std::optional<ext::oneapi::experimental::arch_category>
+get_device_architecture_category(ext::oneapi::experimental::architecture arch) {
+  auto arch_is_in_segment =
+      [&arch](ext::oneapi::experimental::architecture min,
+              ext::oneapi::experimental::architecture max) {
+        if ((min <= arch) && (arch <= max))
+          return true;
+        return false;
+      };
+
+  if (arch_is_in_segment(min_intel_gpu_architecture,
+                         max_intel_gpu_architecture))
+    return ext::oneapi::experimental::arch_category::intel_gpu;
+  if (arch_is_in_segment(min_nvidia_gpu_architecture,
+                         max_nvidia_gpu_architecture))
+    return ext::oneapi::experimental::arch_category::nvidia_gpu;
+  if (arch_is_in_segment(min_amd_gpu_architecture, max_amd_gpu_architecture))
+    return ext::oneapi::experimental::arch_category::amd_gpu;
+  // add "if " when adding new category
+
+  return std::nullopt;
+}
+
+template <ext::oneapi::experimental::architecture Arch, typename Compare>
+constexpr static bool device_architecture_comparison_aot(Compare comp) {
+  constexpr std::optional<ext::oneapi::experimental::arch_category>
+      input_arch_category = get_device_architecture_category(Arch);
+  constexpr std::optional<ext::oneapi::experimental::architecture>
+      current_arch = get_current_architecture_aot();
+
+  if (input_arch_category.has_value() && current_arch.has_value()) {
+    std::optional<ext::oneapi::experimental::arch_category>
+        current_arch_category = get_device_architecture_category(*current_arch);
+    if (current_arch_category.has_value() &&
+        (*input_arch_category == *current_arch_category))
+      return comp(*current_arch, Arch);
+  }
+  return false;
 }
 
-// Helper object used to implement "else_if_architecture_is" and "otherwise".
-// The "MakeCall" template parameter tells whether a previous clause in the
-// "if-elseif-elseif ..." chain was true.  When "MakeCall" is false, some
-// previous clause was true, so none of the subsequent
-// "else_if_architecture_is" or "otherwise" member functions should call the
-// user's function.
+constexpr auto device_arch_compare_op_lt =
+    [](ext::oneapi::experimental::architecture a,
+       ext::oneapi::experimental::architecture b) constexpr { return a < b; };
+constexpr auto device_arch_compare_op_le =
+    [](ext::oneapi::experimental::architecture a,
+       ext::oneapi::experimental::architecture b) constexpr { return a <= b; };
+constexpr auto device_arch_compare_op_gt =
+    [](ext::oneapi::experimental::architecture a,
+       ext::oneapi::experimental::architecture b) constexpr { return a > b; };
+constexpr auto device_arch_compare_op_ge =
+    [](ext::oneapi::experimental::architecture a,
+       ext::oneapi::experimental::architecture b) constexpr { return a >= b; };
+
+// Helper object used to implement "else_if_architecture_is",
+// "else_if_architecture_is_*" and "otherwise". The "MakeCall" template
+// parameter tells whether a previous clause in the "if-elseif-elseif ..." chain
+// was true.  When "MakeCall" is false, some previous clause was true, so none
+// of the subsequent "else_if_architecture_is", "else_if_architecture_is_*" or
+// "otherwise" member functions should call the user's function.
 template <bool MakeCall> class if_architecture_helper {
 public:
+  /// The condition is `true` only if the object F comes from a previous call
+  /// whose associated condition is `false` *and* if the device which executes
+  /// the `else_if_architecture_is` function has any one of the architectures
+  /// listed in the @tparam Archs parameter pack.
   template <ext::oneapi::experimental::architecture... Archs, typename T>
-  constexpr auto else_if_architecture_is(T fnTrue) {
+  constexpr auto else_if_architecture_is(T fn) {
     if constexpr (MakeCall && device_architecture_is<Archs...>()) {
-      fnTrue();
+      fn();
+      return if_architecture_helper<false>{};
+    } else {
+      (void)fn;
+      return if_architecture_helper<MakeCall>{};
+    }
+  }
+
+  /// The condition is `true` only if the object F comes from a previous call
+  /// whose associated condition is `false` *and* if the device which executes
+  /// the `else_if_architecture_is` function has an architecture that is in any
+  /// one of the categories listed in the @tparam Categories pack.
+  template <ext::oneapi::experimental::arch_category... Categories, typename T>
+  constexpr auto else_if_architecture_is(T fn) {
+    if constexpr (MakeCall &&
+                  device_architecture_is_in_categories<Categories...>()) {
+      fn();
       return if_architecture_helper<false>{};
     } else {
-      (void)fnTrue;
+      (void)fn;
       return if_architecture_helper<MakeCall>{};
     }
   }
 
+  /// The condition is `true` only if the object F comes from a previous call
+  /// whose associated condition is `false` *and* if the device which executes
+  /// the `else_if_architecture_is_lt` function has an architecture that is in
+  /// the same family as @tparam Arch and compares less than @tparam Arch.
+  template <ext::oneapi::experimental::architecture Arch, typename T>
+  constexpr auto else_if_architecture_is_lt(T fn) {
+    if constexpr (MakeCall &&
+                  sycl::detail::device_architecture_comparison_aot<Arch>(
+                      device_arch_compare_op_lt)) {
+      fn();
+      return sycl::detail::if_architecture_helper<false>{};
+    } else {
+      (void)fn;
+      return sycl::detail::if_architecture_helper<MakeCall>{};
+    }
+  }
+
+  /// The condition is `true` only if the object F comes from a previous call
+  /// whose associated condition is `false` *and* if the device which executes
+  /// the `else_if_architecture_is_le` function has an architecture that is in
+  /// the same family as @tparam Arch and compares less than or equal to @tparam
+  /// Arch.
+  template <ext::oneapi::experimental::architecture Arch, typename T>
+  constexpr auto else_if_architecture_is_le(T fn) {
+    if constexpr (MakeCall &&
+                  sycl::detail::device_architecture_comparison_aot<Arch>(
+                      device_arch_compare_op_le)) {
+      fn();
+      return sycl::detail::if_architecture_helper<false>{};
+    } else {
+      (void)fn;
+      return sycl::detail::if_architecture_helper<MakeCall>{};
+    }
+  }
+
+  /// The condition is `true` only if the object F comes from a previous call
+  /// whose associated condition is `false` *and* if the device which executes
+  /// the `else_if_architecture_is_gt` function has an architecture that is in
+  /// the same family as @tparam Arch and compares greater than @tparam Arch.
+  template <ext::oneapi::experimental::architecture Arch, typename T>
+  constexpr auto else_if_architecture_is_gt(T fn) {
+    if constexpr (MakeCall &&
+                  sycl::detail::device_architecture_comparison_aot<Arch>(
+                      device_arch_compare_op_gt)) {
+      fn();
+      return sycl::detail::if_architecture_helper<false>{};
+    } else {
+      (void)fn;
+      return sycl::detail::if_architecture_helper<MakeCall>{};
+    }
+  }
+
+  /// The condition is `true` only if the object F comes from a previous call
+  /// whose associated condition is `false` *and* if the device which executes
+  /// the `else_if_architecture_is_ge` function has an architecture that is in
+  /// the same family as @tparam Arch and compares greater than or equal to
+  /// @tparam Arch.
+  template <ext::oneapi::experimental::architecture Arch, typename T>
+  constexpr auto else_if_architecture_is_ge(T fn) {
+    if constexpr (MakeCall &&
+                  sycl::detail::device_architecture_comparison_aot<Arch>(
+                      device_arch_compare_op_ge)) {
+      fn();
+      return sycl::detail::if_architecture_helper<false>{};
+    } else {
+      (void)fn;
+      return sycl::detail::if_architecture_helper<MakeCall>{};
+    }
+  }
+
+  /// The condition is `true` only if the object F comes from a previous call
+  /// whose associated condition is `false` *and* if the device which executes
+  /// the `else_if_architecture_is_between` function has an architecture that is
+  /// in the same family as @tparam Arch1 and is greater than or equal to
+  /// @tparam Arch1 and is less than or equal to @tparam Arch2.
+  template <ext::oneapi::experimental::architecture Arch1,
+            ext::oneapi::experimental::architecture Arch2, typename T>
+  constexpr auto else_if_architecture_is_between(T fn) {
+    if constexpr (MakeCall &&
+                  sycl::detail::device_architecture_comparison_aot<Arch1>(
+                      device_arch_compare_op_ge) &&
+                  sycl::detail::device_architecture_comparison_aot<Arch2>(
+                      device_arch_compare_op_le)) {
+      fn();
+      return sycl::detail::if_architecture_helper<false>{};
+    } else {
+      (void)fn;
+      return sycl::detail::if_architecture_helper<MakeCall>{};
+    }
+  }
+
   template <typename T> constexpr void otherwise(T fn) {
     if constexpr (MakeCall) {
       fn();
@@ -644,17 +1009,113 @@ template <bool MakeCall> class if_architecture_helper {
 
 namespace ext::oneapi::experimental {
 
+/// The condition is `true` only if the device which executes the
+/// `if_architecture_is` function has any one of the architectures listed in the
+/// @tparam Archs pack.
 template <architecture... Archs, typename T>
-constexpr static auto if_architecture_is(T fnTrue) {
+constexpr static auto if_architecture_is(T fn) {
   static_assert(sycl::detail::allowable_aot_mode<Archs...>(),
                 "The if_architecture_is function may only be used when AOT "
                 "compiling with '-fsycl-targets=spir64_x86_64' or "
                 "'-fsycl-targets=*_gpu_*'");
   if constexpr (sycl::detail::device_architecture_is<Archs...>()) {
-    fnTrue();
+    fn();
+    return sycl::detail::if_architecture_helper<false>{};
+  } else {
+    (void)fn;
+    return sycl::detail::if_architecture_helper<true>{};
+  }
+}
+
+/// The condition is `true` only if the device which executes the
+/// `if_architecture_is` function has an architecture that is in any one of the
+/// categories listed in the @tparam Categories pack.
+template <arch_category... Categories, typename T>
+constexpr static auto if_architecture_is(T fn) {
+  if constexpr (sycl::detail::device_architecture_is_in_categories<
+                    Categories...>()) {
+    fn();
+    return sycl::detail::if_architecture_helper<false>{};
+  } else {
+    (void)fn;
+    return sycl::detail::if_architecture_helper<true>{};
+  }
+}
+
+/// The condition is `true` only if the device which executes the
+/// `if_architecture_is_lt` function has an architecture that is in the same
+/// family as @tparam Arch and compares less than @tparam Arch.
+template <architecture Arch, typename T>
+constexpr static auto if_architecture_is_lt(T fn) {
+  if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
+                    sycl::detail::device_arch_compare_op_lt)) {
+    fn();
+    return sycl::detail::if_architecture_helper<false>{};
+  } else {
+    (void)fn;
+    return sycl::detail::if_architecture_helper<true>{};
+  }
+}
+
+/// The condition is `true` only if the device which executes the
+/// `if_architecture_is_le` function has an architecture that is in the same
+/// family as @tparam Arch and compares less than or equal to @tparam Arch.
+template <architecture Arch, typename T>
+constexpr static auto if_architecture_is_le(T fn) {
+  if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
+                    sycl::detail::device_arch_compare_op_le)) {
+    fn();
+    return sycl::detail::if_architecture_helper<false>{};
+  } else {
+    (void)fn;
+    return sycl::detail::if_architecture_helper<true>{};
+  }
+}
+
+/// The condition is `true` only if the device which executes the
+/// `if_architecture_is_gt` function has an architecture that is in the same
+/// family as @tparam Arch and compares greater than @tparam Arch.
+template <architecture Arch, typename T>
+constexpr static auto if_architecture_is_gt(T fn) {
+  if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
+                    sycl::detail::device_arch_compare_op_gt)) {
+    fn();
+    return sycl::detail::if_architecture_helper<false>{};
+  } else {
+    (void)fn;
+    return sycl::detail::if_architecture_helper<true>{};
+  }
+}
+
+/// The condition is `true` only if the device which executes the
+/// `if_architecture_is_ge` function has an architecture that is in the same
+/// family as @tparam Arch and compares greater than or equal to @tparam Arch.
+template <architecture Arch, typename T>
+constexpr static auto if_architecture_is_ge(T fn) {
+  if constexpr (sycl::detail::device_architecture_comparison_aot<Arch>(
+                    sycl::detail::device_arch_compare_op_ge)) {
+    fn();
+    return sycl::detail::if_architecture_helper<false>{};
+  } else {
+    (void)fn;
+    return sycl::detail::if_architecture_helper<true>{};
+  }
+}
+
+/// The condition is `true` only if the device which executes the
+/// `if_architecture_is_between` function has an architecture that is in the
+/// same family as @tparam Arch1 and is greater than or equal to @tparam
+/// Arch1 and is less than or equal to @tparam Arch2.
+template <architecture Arch1, architecture Arch2, typename T>
+constexpr static auto if_architecture_is_between(T fn) {
+  if constexpr (sycl::detail::device_architecture_comparison_aot<Arch1>(
+                    sycl::detail::device_arch_compare_op_ge) &&
+                sycl::detail::device_architecture_comparison_aot<Arch2>(
+                    sycl::detail::device_arch_compare_op_le)) {
+    fn();
     return sycl::detail::if_architecture_helper<false>{};
   } else {
-    (void)fnTrue;
+    (void)fn;
     return sycl::detail::if_architecture_helper<true>{};
   }
 }
diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp
index f6b4a27b61232..24ff4d20ef03a 100644
--- a/sycl/source/detail/device_impl.hpp
+++ b/sycl/source/detail/device_impl.hpp
@@ -12,6 +12,7 @@
 #include <sycl/aspects.hpp>
 #include <sycl/detail/cl.h>
 #include <sycl/detail/pi.hpp>
+#include <sycl/ext/oneapi/experimental/device_architecture.hpp>
 #include <sycl/kernel_bundle.hpp>
 
 #include <memory>
@@ -238,6 +239,18 @@ class device_impl {
     return Arch == getDeviceArch();
   }
 
+  bool extOneapiArchitectureIs(
+      ext::oneapi::experimental::arch_category Category) const {
+    std::optional<ext::oneapi::experimental::architecture> CategoryMinArch =
+        get_category_min_architecture(Category);
+    std::optional<ext::oneapi::experimental::architecture> CategoryMaxArch =
+        get_category_max_architecture(Category);
+    if (CategoryMinArch.has_value() && CategoryMaxArch.has_value())
+      return CategoryMinArch <= getDeviceArch() &&
+             getDeviceArch() <= CategoryMaxArch;
+    return false;
+  }
+
   /// Gets the current device timestamp
   /// @throw sycl::feature_not_supported if feature is not supported on device
   uint64_t getCurrentDeviceTime();
diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp
index 2bf76a3fa9407..d0753d663da25 100644
--- a/sycl/source/device.cpp
+++ b/sycl/source/device.cpp
@@ -285,10 +285,16 @@ bool device::ext_oneapi_can_access_peer(const device &peer,
 
   return value == 1;
 }
+
 bool device::ext_oneapi_architecture_is(
     ext::oneapi::experimental::architecture arch) {
   return impl->extOneapiArchitectureIs(arch);
 }
 
+bool device::ext_oneapi_architecture_is(
+    ext::oneapi::experimental::arch_category category) {
+  return impl->extOneapiArchitectureIs(category);
+}
+
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp
new file mode 100644
index 0000000000000..cb1214395f8e4
--- /dev/null
+++ b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp
@@ -0,0 +1,199 @@
+// REQUIRES: gpu, gpu-intel-pvc
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc %s -o %t.out
+// RUN: %{run} %t.out
+
+#include <sycl/ext/oneapi/experimental/device_architecture.hpp>
+#include <sycl/sycl.hpp>
+
+using namespace sycl;
+using namespace sycl::ext::oneapi::experimental;
+
+int main() {
+  int *result;
+  int N = 29;
+
+  {
+    queue q(gpu_selector_v);
+
+    result = (int *)malloc_shared(N * sizeof(int), q);
+
+    q.submit([&](handler &cgh) { cgh.fill(result, 0, N); }).wait();
+
+    q.submit([&](handler &cgh) {
+      cgh.single_task([=]() {
+        // test if_architecture_is with category
+        if_architecture_is<arch_category::intel_gpu>([&]() {
+          result[0] = 1;
+        }).otherwise([&]() {});
+
+        // negative test if_architecture_is with category
+        if_architecture_is<arch_category::nvidia_gpu>([&]() {}).otherwise(
+            [&]() { result[1] = 1; });
+
+        // test else_if_architecture_is with category - 1
+        if_architecture_is<arch_category::nvidia_gpu>([&]() {})
+            .else_if_architecture_is<arch_category::intel_gpu>(
+                [&]() { result[2] = 1; })
+            .otherwise([&]() {});
+
+        // test else_if_architecture_is with category - 2
+        if_architecture_is<arch_category::nvidia_gpu>([&]() {})
+            .else_if_architecture_is<arch_category::amd_gpu>([&]() {})
+            .else_if_architecture_is<arch_category::intel_gpu>(
+                [&]() { result[3] = 1; })
+            .otherwise([&]() {});
+
+        // test if_architecture_is_lt
+        if_architecture_is_lt<architecture::intel_gpu_pvc_vg>([&]() {
+          result[4] = 1;
+        }).otherwise([&]() {});
+
+        // negative test if_architecture_is_lt - 1
+        if_architecture_is_lt<architecture::intel_gpu_pvc>([&]() {}).otherwise(
+            [&]() { result[5] = 1; });
+
+        // negative test if_architecture_is_lt - 2
+        if_architecture_is_lt<architecture::nvidia_gpu_sm_89>([&]() {
+        }).otherwise([&]() { result[6] = 1; });
+
+        // test else_if_architecture_is_lt - 1
+        if_architecture_is_lt<architecture::intel_gpu_bdw>([&]() {})
+            .else_if_architecture_is_lt<architecture::intel_gpu_pvc_vg>(
+                [&]() { result[7] = 1; })
+            .otherwise([&]() {});
+
+        // test else_if_architecture_is_lt - 2
+        if_architecture_is_lt<architecture::intel_gpu_bdw>([&]() {
+        }).else_if_architecture_is_lt<architecture::nvidia_gpu_sm_89>([&]() {})
+            .else_if_architecture_is_lt<architecture::intel_gpu_pvc_vg>(
+                [&]() { result[8] = 1; })
+            .otherwise([&]() {});
+
+        // test if_architecture_is_le
+        if_architecture_is_le<architecture::intel_gpu_pvc>([&]() {
+          result[9] = 1;
+        }).otherwise([&]() {});
+
+        // negative test if_architecture_is_le - 1
+        if_architecture_is_le<architecture::intel_gpu_skl>([&]() {}).otherwise(
+            [&]() { result[10] = 1; });
+
+        // negative test if_architecture_is_le - 2
+        if_architecture_is_le<architecture::nvidia_gpu_sm_89>([&]() {
+        }).otherwise([&]() { result[11] = 1; });
+
+        // test else_if_architecture_is_le - 1
+        if_architecture_is_le<architecture::intel_gpu_skl>([&]() {})
+            .else_if_architecture_is_le<architecture::intel_gpu_pvc>(
+                [&]() { result[12] = 1; })
+            .otherwise([&]() {});
+
+        // test else_if_architecture_is_le - 2
+        if_architecture_is_le<architecture::intel_gpu_skl>([&]() {
+        }).else_if_architecture_is_le<architecture::nvidia_gpu_sm_89>([&]() {})
+            .else_if_architecture_is_le<architecture::intel_gpu_pvc>(
+                [&]() { result[13] = 1; })
+            .otherwise([&]() {});
+
+        // test if_architecture_is_gt
+        if_architecture_is_gt<architecture::intel_gpu_skl>([&]() {
+          result[14] = 1;
+        }).otherwise([&]() {});
+
+        // negative test if_architecture_is_gt - 1
+        if_architecture_is_gt<architecture::intel_gpu_pvc_vg>([&]() {
+        }).otherwise([&]() { result[15] = 1; });
+
+        // negative test if_architecture_is_gt - 2
+        if_architecture_is_gt<architecture::nvidia_gpu_sm_89>([&]() {
+        }).otherwise([&]() { result[16] = 1; });
+
+        // test else_if_architecture_is_gt - 1
+        if_architecture_is_gt<architecture::intel_gpu_pvc_vg>([&]() {})
+            .else_if_architecture_is_gt<architecture::intel_gpu_skl>(
+                [&]() { result[17] = 1; })
+            .otherwise([&]() {});
+
+        // test else_if_architecture_is_gt - 2
+        if_architecture_is_gt<architecture::intel_gpu_pvc_vg>([&]() {
+        }).else_if_architecture_is_gt<architecture::nvidia_gpu_sm_89>([&]() {})
+            .else_if_architecture_is_gt<architecture::intel_gpu_skl>(
+                [&]() { result[18] = 1; })
+            .otherwise([&]() {});
+
+        // test if_architecture_is_ge
+        if_architecture_is_ge<architecture::intel_gpu_pvc>([&]() {
+          result[19] = 1;
+        }).otherwise([&]() {});
+
+        // negative test if_architecture_is_ge - 1
+        if_architecture_is_ge<architecture::intel_gpu_pvc_vg>([&]() {
+        }).otherwise([&]() { result[20] = 1; });
+
+        // negative test if_architecture_is_ge - 2
+        if_architecture_is_ge<architecture::nvidia_gpu_sm_89>([&]() {
+        }).otherwise([&]() { result[21] = 1; });
+
+        // test else_if_architecture_is_ge - 1
+        if_architecture_is_ge<architecture::intel_gpu_pvc_vg>([&]() {})
+            .else_if_architecture_is_ge<architecture::intel_gpu_pvc>(
+                [&]() { result[22] = 1; })
+            .otherwise([&]() {});
+
+        // test else_if_architecture_is_ge - 2
+        if_architecture_is_ge<architecture::intel_gpu_pvc_vg>([&]() {
+        }).else_if_architecture_is_ge<architecture::nvidia_gpu_sm_89>([&]() {})
+            .else_if_architecture_is_ge<architecture::intel_gpu_pvc>(
+                [&]() { result[23] = 1; })
+            .otherwise([&]() {});
+
+        // test if_architecture_is_between
+        if_architecture_is_between<architecture::intel_gpu_skl,
+                                   architecture::intel_gpu_pvc_vg>([&]() {
+          result[24] = 1;
+        }).otherwise([&]() {});
+
+        // negative test if_architecture_is_between - 1
+        if_architecture_is_between<architecture::intel_gpu_dg1,
+                                   architecture::intel_gpu_dg2_g12>([&]() {
+        }).otherwise([&]() { result[25] = 1; });
+
+        // negative test if_architecture_is_between - 2
+        if_architecture_is_between<architecture::intel_gpu_dg1,
+                                   architecture::nvidia_gpu_sm_89>([&]() {
+        }).otherwise([&]() { result[26] = 1; });
+
+        // test else_if_architecture_is_between - 1
+        if_architecture_is_between<architecture::intel_gpu_dg1,
+                                   architecture::intel_gpu_dg2_g12>([&]() {})
+            .else_if_architecture_is_between<architecture::intel_gpu_skl,
+                                             architecture::intel_gpu_pvc>(
+                [&]() { result[27] = 1; })
+            .otherwise([&]() {});
+
+        // test else_if_architecture_is_between - 2
+        if_architecture_is_between<architecture::intel_gpu_dg1,
+                                   architecture::intel_gpu_dg2_g12>([&]() {})
+            .else_if_architecture_is_between<architecture::intel_gpu_dg1,
+                                             architecture::nvidia_gpu_sm_89>(
+                [&]() {})
+            .else_if_architecture_is_between<architecture::intel_gpu_skl,
+                                             architecture::intel_gpu_pvc>(
+                [&]() { result[28] = 1; })
+            .otherwise([&]() {});
+
+        // if adding new test here, don't forget to increment result's index and
+        // value of N variable
+      });
+    });
+  }
+
+  bool failed = false;
+  for (int i = 0; i < N; i++)
+    if (result[i] != 1) {
+      std::cout << "Verification of the test " << i << " failed." << std::endl;
+      failed = true;
+    }
+
+  return failed;
+}
diff --git a/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp
new file mode 100644
index 0000000000000..eafbc0f2aadad
--- /dev/null
+++ b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_host.cpp
@@ -0,0 +1,42 @@
+// REQUIRES: gpu
+
+// This test is written only for Intel architectures. It is expected that this
+// test will fail on NVIDIA and AMD as the checks for ext_oneapi_architecture_is
+// host API expect that device architecture is Intel GPU
+// UNSUPPORTED: cuda, hip
+
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+#include <sycl/sycl.hpp>
+
+namespace syclex = sycl::ext::oneapi::experimental;
+
+int main() {
+  sycl::queue q;
+  sycl::device dev = q.get_device();
+
+  assert(dev.ext_oneapi_architecture_is(syclex::arch_category::intel_gpu));
+  assert(!dev.ext_oneapi_architecture_is(syclex::arch_category::nvidia_gpu));
+  assert(!dev.ext_oneapi_architecture_is(syclex::arch_category::amd_gpu));
+
+  syclex::architecture intel_gpu_arch = syclex::architecture::intel_gpu_ehl;
+  assert(intel_gpu_arch < syclex::architecture::intel_gpu_pvc);
+  assert(intel_gpu_arch <= syclex::architecture::intel_gpu_pvc);
+  assert(intel_gpu_arch > syclex::architecture::intel_gpu_skl);
+  assert(intel_gpu_arch >= syclex::architecture::intel_gpu_ehl);
+
+  syclex::architecture nvidia_gpu_arch = syclex::architecture::nvidia_gpu_sm_70;
+  assert(nvidia_gpu_arch < syclex::architecture::nvidia_gpu_sm_80);
+  assert(nvidia_gpu_arch <= syclex::architecture::nvidia_gpu_sm_80);
+  assert(nvidia_gpu_arch > syclex::architecture::nvidia_gpu_sm_53);
+  assert(nvidia_gpu_arch >= syclex::architecture::nvidia_gpu_sm_70);
+
+  syclex::architecture amd_gpu_arch = syclex::architecture::amd_gpu_gfx908;
+  assert(amd_gpu_arch < syclex::architecture::amd_gpu_gfx1031);
+  assert(amd_gpu_arch <= syclex::architecture::amd_gpu_gfx1031);
+  assert(amd_gpu_arch > syclex::architecture::amd_gpu_gfx810);
+  assert(amd_gpu_arch >= syclex::architecture::amd_gpu_gfx908);
+
+  return 0;
+}
diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump
index 4fe3042f807ab..75aca1a6dda64 100644
--- a/sycl/test/abi/sycl_symbols_linux.dump
+++ b/sycl/test/abi/sycl_symbols_linux.dump
@@ -4058,6 +4058,7 @@ _ZN4sycl3_V16detail9join_implERKSt6vectorISt10shared_ptrINS1_18kernel_bundle_imp
 _ZN4sycl3_V16detail9link_implERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EERKS2_INS0_6deviceESaISA_EERKNS0_13property_listE
 _ZN4sycl3_V16device11get_devicesENS0_4info11device_typeE
 _ZN4sycl3_V16device26ext_oneapi_architecture_isENS0_3ext6oneapi12experimental12architectureE
+_ZN4sycl3_V16device26ext_oneapi_architecture_isENS0_3ext6oneapi12experimental13arch_categoryE
 _ZN4sycl3_V16device26ext_oneapi_can_access_peerERKS1_NS0_3ext6oneapi11peer_accessE
 _ZN4sycl3_V16device29ext_oneapi_enable_peer_accessERKS1_
 _ZN4sycl3_V16device30ext_oneapi_disable_peer_accessERKS1_
diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump
index 435176097e7bd..fa0cc450ce4ce 100644
--- a/sycl/test/abi/sycl_symbols_windows.dump
+++ b/sycl/test/abi/sycl_symbols_windows.dump
@@ -1017,6 +1017,7 @@
 ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z
 ?ext_oneapi_advise_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEBX_KW4_pi_mem_advice@@V?$vector@IV?$allocator@I@std@@@6@PEAI@Z
 ?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4architecture@experimental@oneapi@ext@23@@Z
+?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4arch_category@experimental@oneapi@ext@23@@Z
 ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@@Z
 ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXXZ
 ?ext_oneapi_can_access_peer@device@_V1@sycl@@QEAA_NAEBV123@W4peer_access@oneapi@ext@23@@Z