From 1c9d479057016cf0d13cfc9fc8d6617c1b0e829a Mon Sep 17 00:00:00 2001
From: "Klochkov, Vyacheslav N" <vyacheslav.n.klochkov@intel.com>
Date: Wed, 28 Feb 2024 20:28:43 -0800
Subject: [PATCH] [ESIMD][NFC] Rename L3 cache hint parameters to L2 to avoid
 confuses

The experimental memory API used L3 to name the cache that always was L2
due to some historical reasons.
The new non-experimental API names the same cache level as L2.

This patch just renames the old L3 params to L2 to unify the naming in
experimental and non-experimental namespaces and avoid the mess in
cache names.

Signed-off-by: Klochkov, Vyacheslav N <vyacheslav.n.klochkov@intel.com>
---
 .../esimd/detail/memory_intrin.hpp            |  20 +-
 .../ext/intel/experimental/esimd/memory.hpp   | 360 +++++++++---------
 .../ESIMD/lsc/Inputs/lsc_load_prefetch_2d.hpp |   6 +-
 .../ESIMD/lsc/Inputs/lsc_store_2d.hpp         |   4 +-
 .../ESIMD/lsc/Inputs/lsc_surf_load.hpp        |  16 +-
 .../ESIMD/lsc/Inputs/lsc_surf_store.hpp       |   8 +-
 .../Inputs/lsc_usm_block_load_prefetch.hpp    |  78 ++--
 .../lsc/Inputs/lsc_usm_gather_prefetch.hpp    |  30 +-
 .../ESIMD/lsc/Inputs/lsc_usm_store.hpp        |   8 +-
 .../ESIMD/lsc/lsc_prefetch_2d_u16.cpp         |  18 +-
 .../ESIMD/lsc/lsc_prefetch_2d_u32.cpp         |  16 +-
 .../ESIMD/lsc/lsc_prefetch_2d_u64.cpp         |  16 +-
 .../test-e2e/ESIMD/lsc/lsc_prefetch_2d_u8.cpp |  18 +-
 .../ESIMD/lsc/lsc_surf_prefetch_u16u32.cpp    |  14 +-
 .../ESIMD/lsc/lsc_surf_prefetch_u32.cpp       |  20 +-
 .../ESIMD/lsc/lsc_surf_prefetch_u64.cpp       |  20 +-
 .../ESIMD/lsc/lsc_surf_prefetch_u8u32.cpp     |  14 +-
 sycl/test/esimd/check_lsc.cpp                 |   8 +-
 18 files changed, 337 insertions(+), 337 deletions(-)

diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp
index 2a8606d61cde6..4863486841955 100644
--- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp
+++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp
@@ -69,7 +69,7 @@ __ESIMD_INTRIN void __esimd_raw_send_nbarrier_signal(
 ///
 /// @tparam Ty is element type.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam AddressScale is the address scale.
 /// @tparam ImmOffset is the immediate offset added to each address.
 /// @tparam DS is the data size.
@@ -80,7 +80,7 @@ __ESIMD_INTRIN void __esimd_raw_send_nbarrier_signal(
 /// @param pred is predicates.
 /// @param offsets is the zero-based offsets in bytes.
 /// @param surf_ind is the surface index.
-template <typename Ty, __ESIMD_ENS::cache_hint L1H, __ESIMD_ENS::cache_hint L3H,
+template <typename Ty, __ESIMD_ENS::cache_hint L1H, __ESIMD_ENS::cache_hint L2H,
           uint16_t AddressScale, int ImmOffset, __ESIMD_ENS::lsc_data_size DS,
           __ESIMD_EDNS::lsc_vector_size VS,
           __ESIMD_EDNS::lsc_data_order _Transposed, int N,
@@ -97,7 +97,7 @@ __esimd_lsc_prefetch_bti(__ESIMD_DNS::simd_mask_storage_t<N> pred,
 ///
 /// @tparam Ty is element type.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam AddressScale is the address scale.
 /// @tparam ImmOffset is the immediate offset added to each address.
 /// @tparam DS is the data size.
@@ -106,7 +106,7 @@ __esimd_lsc_prefetch_bti(__ESIMD_DNS::simd_mask_storage_t<N> pred,
 /// @tparam N is the SIMD size of operation (the number of addresses to access)
 /// @param pred is predicates.
 /// @param addrs is the prefetch addresses.
-template <typename Ty, __ESIMD_ENS::cache_hint L1H, __ESIMD_ENS::cache_hint L3H,
+template <typename Ty, __ESIMD_ENS::cache_hint L1H, __ESIMD_ENS::cache_hint L2H,
           uint16_t AddressScale, int ImmOffset, __ESIMD_ENS::lsc_data_size DS,
           __ESIMD_EDNS::lsc_vector_size VS,
           __ESIMD_EDNS::lsc_data_order _Transposed, int N>
@@ -122,7 +122,7 @@ __ESIMD_INTRIN void __esimd_lsc_prefetch_stateless(
 ///
 /// @tparam Ty is element type.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam DS is the data size.
 /// @tparam Transposed is the transposed version or not.
 /// @tparam NBlocks is the number of blocks.
@@ -144,7 +144,7 @@ __ESIMD_INTRIN void __esimd_lsc_prefetch_stateless(
 ///  otherwise,
 ///  N = roundUpNextMultiple(BlockHeight, 4 / sizeof(T)) *
 ///   getNextPowerOf2(BlockWidth) * NBlocks
-template <typename Ty, __ESIMD_ENS::cache_hint L1H, __ESIMD_ENS::cache_hint L3H,
+template <typename Ty, __ESIMD_ENS::cache_hint L1H, __ESIMD_ENS::cache_hint L2H,
           __ESIMD_ENS::lsc_data_size DS,
           __ESIMD_EDNS::lsc_data_order _Transposed, uint8_t NBlocks,
           int BlockWidth, int BlockHeight, bool Transformed, int N>
@@ -160,7 +160,7 @@ __esimd_lsc_load2d_stateless(__ESIMD_DNS::simd_mask_storage_t<N> Pred,
 ///
 /// @tparam Ty is element type.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam DS is the data size.
 /// @tparam NBlocks is the number of blocks.
 /// @tparam Transposed is the transposed version or not.
@@ -177,7 +177,7 @@ __esimd_lsc_load2d_stateless(__ESIMD_DNS::simd_mask_storage_t<N> Pred,
 /// number of elements.
 /// @param Y is zero based Y-coordinate of the left upper rectangle corner in
 /// rows.
-template <typename Ty, __ESIMD_ENS::cache_hint L1H, __ESIMD_ENS::cache_hint L3H,
+template <typename Ty, __ESIMD_ENS::cache_hint L1H, __ESIMD_ENS::cache_hint L2H,
           __ESIMD_ENS::lsc_data_size DS,
           __ESIMD_EDNS::lsc_data_order _Transposed, uint8_t NBlocks,
           int BlockWidth, int BlockHeight, bool Transformed, int N>
@@ -192,7 +192,7 @@ __ESIMD_INTRIN void __esimd_lsc_prefetch2d_stateless(
 ///
 /// @tparam Ty is element type.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam DS is the data size.
 /// @tparam Transposed is the transposed version or not.
 /// @tparam NBlocks is the number of blocks.
@@ -214,7 +214,7 @@ __ESIMD_INTRIN void __esimd_lsc_prefetch2d_stateless(
 ///  otherwise,
 ///  N = roundUpNextMultiple(BlockHeight, 4 / sizeof(T)) *
 ///   getNextPowerOf2(BlockWidth) * NBlocks
-template <typename Ty, __ESIMD_ENS::cache_hint L1H, __ESIMD_ENS::cache_hint L3H,
+template <typename Ty, __ESIMD_ENS::cache_hint L1H, __ESIMD_ENS::cache_hint L2H,
           __ESIMD_ENS::lsc_data_size DS,
           __ESIMD_EDNS::lsc_data_order _Transposed, uint8_t NBlocks,
           int BlockWidth, int BlockHeight, bool Transformed, int N>
diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp
index 26939894f2efe..2ab4c798d1e06 100644
--- a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp
+++ b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp
@@ -481,55 +481,55 @@ template <typename T> constexpr uint32_t get_lsc_data_size() {
   }
 }
 
-template <cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none>
+template <cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none>
 constexpr uint32_t get_lsc_load_cache_mask() {
   if constexpr (L1H == cache_hint::read_invalidate &&
-                L3H == cache_hint::cached) {
+                L2H == cache_hint::cached) {
     return 7;
   }
-  if constexpr (L1H == cache_hint::streaming && L3H == cache_hint::cached) {
+  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::cached) {
     return 6;
   }
-  if constexpr (L1H == cache_hint::streaming && L3H == cache_hint::uncached) {
+  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::uncached) {
     return 5;
   }
-  if constexpr (L1H == cache_hint::cached && L3H == cache_hint::cached) {
+  if constexpr (L1H == cache_hint::cached && L2H == cache_hint::cached) {
     return 4;
   }
-  if constexpr (L1H == cache_hint::cached && L3H == cache_hint::uncached) {
+  if constexpr (L1H == cache_hint::cached && L2H == cache_hint::uncached) {
     return 3;
   }
-  if constexpr (L1H == cache_hint::uncached && L3H == cache_hint::cached) {
+  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::cached) {
     return 2;
   }
-  if constexpr (L1H == cache_hint::uncached && L3H == cache_hint::uncached) {
+  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::uncached) {
     return 1;
   }
   return 0;
 }
 
-template <cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none>
+template <cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none>
 constexpr uint32_t get_lsc_store_cache_mask() {
-  if constexpr (L1H == cache_hint::write_back && L3H == cache_hint::cached) {
+  if constexpr (L1H == cache_hint::write_back && L2H == cache_hint::cached) {
     return 7;
   }
-  if constexpr (L1H == cache_hint::streaming && L3H == cache_hint::cached) {
+  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::cached) {
     return 6;
   }
-  if constexpr (L1H == cache_hint::streaming && L3H == cache_hint::uncached) {
+  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::uncached) {
     return 5;
   }
-  if constexpr (L1H == cache_hint::write_through && L3H == cache_hint::cached) {
+  if constexpr (L1H == cache_hint::write_through && L2H == cache_hint::cached) {
     return 4;
   }
   if constexpr (L1H == cache_hint::write_through &&
-                L3H == cache_hint::uncached) {
+                L2H == cache_hint::uncached) {
     return 3;
   }
-  if constexpr (L1H == cache_hint::uncached && L3H == cache_hint::cached) {
+  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::cached) {
     return 2;
   }
-  if constexpr (L1H == cache_hint::uncached && L3H == cache_hint::uncached) {
+  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::uncached) {
     return 1;
   }
   return 0;
@@ -647,7 +647,7 @@ lsc_slm_block_load(uint32_t offset, __ESIMD_NS::simd_mask<1> pred,
 /// @tparam NElts is the number of elements to load per address.
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam N is the number of channels (platform dependent).
 /// @param p is the base pointer.
 /// @param offsets is the zero-based offsets in bytes.
@@ -656,12 +656,12 @@ lsc_slm_block_load(uint32_t offset, __ESIMD_NS::simd_mask<1> pred,
 ///
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename Toffset>
 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
 lsc_gather(const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
            __ESIMD_NS::simd_mask<N> pred = 1) {
-  return __ESIMD_DNS::gather_impl<T, NElts, DS, L1H, L3H>(p, offsets, pred);
+  return __ESIMD_DNS::gather_impl<T, NElts, DS, L1H, L2H>(p, offsets, pred);
 }
 
 /// USM pointer gather.
@@ -675,7 +675,7 @@ lsc_gather(const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
 /// @tparam NElts is the number of elements to load per address.
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam N is the number of channels (platform dependent).
 /// @param p is the base pointer.
 /// @param offsets is the zero-based offsets in bytes.
@@ -686,58 +686,58 @@ lsc_gather(const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
 ///
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename Toffset>
 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
 lsc_gather(const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
            __ESIMD_NS::simd_mask<N> pred,
            __ESIMD_NS::simd<T, N * NElts> pass_thru) {
-  return __ESIMD_DNS::gather_impl<T, NElts, DS, L1H, L3H>(p, offsets, pred,
+  return __ESIMD_DNS::gather_impl<T, NElts, DS, L1H, L2H>(p, offsets, pred,
                                                           pass_thru);
 }
 
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename OffsetObjT, typename RegionTy>
 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
 lsc_gather(const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
            __ESIMD_NS::simd_mask<N> pred = 1) {
-  return lsc_gather<T, NElts, DS, L1H, L3H, N>(p, offsets.read(), pred);
+  return lsc_gather<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred);
 }
 
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename OffsetObjT, typename RegionTy>
 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
 lsc_gather(const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
            __ESIMD_NS::simd_mask<N> pred,
            __ESIMD_NS::simd<T, N * NElts> pass_thru) {
-  return lsc_gather<T, NElts, DS, L1H, L3H, N>(p, offsets.read(), pred,
+  return lsc_gather<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred,
                                                pass_thru);
 }
 
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename Toffset>
 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
                              __ESIMD_NS::simd<T, N * NElts>>
 lsc_gather(const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
-  return lsc_gather<T, NElts, DS, L1H, L3H, N>(
+  return lsc_gather<T, NElts, DS, L1H, L2H, N>(
       p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
 }
 
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename Toffset>
 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
                              __ESIMD_NS::simd<T, N * NElts>>
 lsc_gather(const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred,
            __ESIMD_NS::simd<T, N * NElts> pass_thru) {
-  return lsc_gather<T, NElts, DS, L1H, L3H, N>(
+  return lsc_gather<T, NElts, DS, L1H, L2H, N>(
       p, __ESIMD_NS::simd<Toffset, N>(offset), pred, pass_thru);
 }
 
@@ -752,7 +752,7 @@ lsc_gather(const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred,
 /// @tparam NElts is the number of elements to load per address.
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam N is the number of channels (platform dependent).
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
@@ -762,7 +762,7 @@ lsc_gather(const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred,
 ///
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename AccessorTy>
 __ESIMD_API
     std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
@@ -772,11 +772,11 @@ __ESIMD_API
                __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
                __ESIMD_NS::simd_mask<N> pred = 1) {
 #ifdef __ESIMD_FORCE_STATELESS_MEM
-  return lsc_gather<T, NElts, DS, L1H, L3H>(
+  return lsc_gather<T, NElts, DS, L1H, L2H>(
       reinterpret_cast<T *>(acc.get_pointer().get()), offsets, pred);
 #else
   __ESIMD_NS::simd<T, N * NElts> PassThru; // Intentionally unitialized.
-  return __ESIMD_DNS::gather_impl<T, N * NElts, NElts, L1H, L3H, DS>(
+  return __ESIMD_DNS::gather_impl<T, N * NElts, NElts, L1H, L2H, DS>(
       acc, offsets, pred, PassThru);
 #endif // __ESIMD_FORCE_STATELESS_MEM
 }
@@ -784,7 +784,7 @@ __ESIMD_API
 #ifdef __ESIMD_FORCE_STATELESS_MEM
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename AccessorTy, typename Toffset>
 __ESIMD_API std::enable_if_t<
     __ESIMD_DNS::is_device_accessor_with_v<
@@ -793,14 +793,14 @@ __ESIMD_API std::enable_if_t<
     __ESIMD_NS::simd<T, N * NElts>>
 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
            __ESIMD_NS::simd_mask<N> pred = 1) {
-  return lsc_gather<T, NElts, DS, L1H, L3H, N, AccessorTy>(
+  return lsc_gather<T, NElts, DS, L1H, L2H, N, AccessorTy>(
       acc, convert<uint64_t>(offsets), pred);
 }
 #endif
 
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename AccessorTy>
 __ESIMD_API
     std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v<
@@ -823,7 +823,7 @@ __ESIMD_API
 /// @tparam NElts is the number of elements to load per address.
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam N is the number of channels (platform dependent).
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
@@ -835,7 +835,7 @@ __ESIMD_API
 ///
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename AccessorTy>
 __ESIMD_API
     std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
@@ -846,11 +846,11 @@ __ESIMD_API
                __ESIMD_NS::simd_mask<N> pred,
                __ESIMD_NS::simd<T, N * NElts> pass_thru) {
 #ifdef __ESIMD_FORCE_STATELESS_MEM
-  return lsc_gather<T, NElts, DS, L1H, L3H>(
+  return lsc_gather<T, NElts, DS, L1H, L2H>(
       reinterpret_cast<T *>(acc.get_pointer().get()), offsets, pred, pass_thru);
 
 #else
-  return __ESIMD_DNS::gather_impl<T, N * NElts, NElts, L1H, L3H, DS>(
+  return __ESIMD_DNS::gather_impl<T, N * NElts, NElts, L1H, L2H, DS>(
       acc, offsets, pred, pass_thru);
 #endif // __ESIMD_FORCE_STATELESS_MEM
 }
@@ -858,7 +858,7 @@ __ESIMD_API
 #ifdef __ESIMD_FORCE_STATELESS_MEM
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename AccessorTy, typename Toffset>
 __ESIMD_API std::enable_if_t<
     __ESIMD_DNS::is_device_accessor_with_v<
@@ -868,14 +868,14 @@ __ESIMD_API std::enable_if_t<
 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
            __ESIMD_NS::simd_mask<N> pred,
            __ESIMD_NS::simd<T, N * NElts> pass_thru) {
-  return lsc_gather<T, NElts, DS, L1H, L3H, N, AccessorTy>(
+  return lsc_gather<T, NElts, DS, L1H, L2H, N, AccessorTy>(
       acc, convert<uint64_t>(offsets), pred, pass_thru);
 }
 #endif
 
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename AccessorTy>
 __ESIMD_API std::enable_if_t<
     sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>,
@@ -912,7 +912,7 @@ lsc_gather(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
 /// @tparam NElts is the number of elements to load per address.
 /// @tparam DS is the data size (unused/obsolete).
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @param p is the base pointer.
 /// @param pred is operation predicate. Zero means operation is skipped
 /// entirely, non-zero - operation is performed. The default is '1' -
@@ -923,13 +923,13 @@ lsc_gather(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
 /// are undefined.
 ///
 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
                              __ESIMD_NS::simd<T, NElts>>
 lsc_block_load(const T *p, __ESIMD_NS::simd_mask<1> pred = 1,
                FlagsT flags = FlagsT{}) {
-  return __ESIMD_DNS::block_load_impl<T, NElts, L1H, L3H>(p, pred, flags);
+  return __ESIMD_DNS::block_load_impl<T, NElts, L1H, L2H>(p, pred, flags);
 }
 
 /// A variation of lsc_block_load without predicate parameter to simplify use
@@ -953,7 +953,7 @@ lsc_block_load(const T *p, __ESIMD_NS::simd_mask<1> pred = 1,
 /// @tparam NElts is the number of elements to load per address.
 /// @tparam DS is the data size (unused/obsolete).
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @param p is the base pointer.
 /// @param flags is the alignment specifier type tag.
 /// @return is a vector of type T and size NElts. The elements of the
@@ -961,12 +961,12 @@ lsc_block_load(const T *p, __ESIMD_NS::simd_mask<1> pred = 1,
 /// are undefined.
 ///
 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
                              __ESIMD_NS::simd<T, NElts>>
 lsc_block_load(const T *p, FlagsT flags) {
-  return __ESIMD_DNS::block_load_impl<T, NElts, L1H, L3H>(
+  return __ESIMD_DNS::block_load_impl<T, NElts, L1H, L2H>(
       p, __ESIMD_NS::simd_mask<1>(1), flags);
 }
 
@@ -992,7 +992,7 @@ lsc_block_load(const T *p, FlagsT flags) {
 /// @tparam NElts is the number of elements to load per address.
 /// @tparam DS is the data size (unused/obsolete).
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @param p is the base pointer.
 /// @param pred is operation predicate. Zero means operation is skipped
 /// entirely, non-zero - operation is performed.
@@ -1002,13 +1002,13 @@ lsc_block_load(const T *p, FlagsT flags) {
 /// @return is a vector of type T and size NElts.
 ///
 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
                              __ESIMD_NS::simd<T, NElts>>
 lsc_block_load(const T *p, __ESIMD_NS::simd_mask<1> pred,
                __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT flags = FlagsT{}) {
-  return __ESIMD_DNS::block_load_impl<T, NElts, L1H, L3H>(p, pred, pass_thru,
+  return __ESIMD_DNS::block_load_impl<T, NElts, L1H, L2H>(p, pred, pass_thru,
                                                           flags);
 }
 
@@ -1032,7 +1032,7 @@ lsc_block_load(const T *p, __ESIMD_NS::simd_mask<1> pred,
 /// @tparam NElts is the number of elements to load per address.
 /// @tparam DS is the data size (unused/obsolete).
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
 /// @param offset is the zero-based offset in bytes.
@@ -1044,7 +1044,7 @@ lsc_block_load(const T *p, __ESIMD_NS::simd_mask<1> pred,
 /// vector for which the corresponding element in \p pred is 0 are undefined.
 ///
 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy,
           typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
 __ESIMD_API std::enable_if_t<
@@ -1054,12 +1054,12 @@ __ESIMD_API std::enable_if_t<
     __ESIMD_NS::simd<T, NElts>>
 lsc_block_load(AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset,
                __ESIMD_NS::simd_mask<1> pred = 1, FlagsT flags = FlagsT{}) {
-  return __ESIMD_DNS::block_load_impl<T, NElts, L1H, L3H>(acc, offset, pred,
+  return __ESIMD_DNS::block_load_impl<T, NElts, L1H, L2H>(acc, offset, pred,
                                                           flags);
 }
 
 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy,
           typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
 __ESIMD_API std::enable_if_t<
@@ -1092,7 +1092,7 @@ lsc_block_load(AccessorTy acc, uint32_t offset,
 /// @tparam NElts is the number of elements to load per address.
 /// @tparam DS is the data size (unused/obsolete).
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
 /// @param offset is the zero-based offset in bytes.
@@ -1101,7 +1101,7 @@ lsc_block_load(AccessorTy acc, uint32_t offset,
 /// vector for which the corresponding element in \p pred is 0 are undefined.
 ///
 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy,
           typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
 __ESIMD_API std::enable_if_t<
@@ -1111,12 +1111,12 @@ __ESIMD_API std::enable_if_t<
     __ESIMD_NS::simd<T, NElts>>
 lsc_block_load(AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset,
                FlagsT flags) {
-  return lsc_block_load<T, NElts, DS, L1H, L3H>(
+  return lsc_block_load<T, NElts, DS, L1H, L2H>(
       acc, offset, __ESIMD_NS::simd_mask<1>(1), flags);
 }
 
 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy,
           typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
 __ESIMD_API std::enable_if_t<
@@ -1125,7 +1125,7 @@ __ESIMD_API std::enable_if_t<
         __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
     __ESIMD_NS::simd<T, NElts>>
 lsc_block_load(AccessorTy acc, uint32_t offset, FlagsT flags) {
-  return lsc_block_load<T, NElts, DS, L1H, L3H>(
+  return lsc_block_load<T, NElts, DS, L1H, L2H>(
       acc, offset, __ESIMD_NS::simd_mask<1>(1), flags);
 }
 
@@ -1149,7 +1149,7 @@ lsc_block_load(AccessorTy acc, uint32_t offset, FlagsT flags) {
 /// @tparam NElts is the number of elements to load per address.
 /// @tparam DS is the data size (unused/obsolete).
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
 /// @param offset is the zero-based offset in bytes.
@@ -1162,7 +1162,7 @@ lsc_block_load(AccessorTy acc, uint32_t offset, FlagsT flags) {
 /// @return is a vector of type T and size NElts
 ///
 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy,
           typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
 __ESIMD_API std::enable_if_t<
@@ -1173,12 +1173,12 @@ __ESIMD_API std::enable_if_t<
 lsc_block_load(AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset,
                __ESIMD_NS::simd_mask<1> pred,
                __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT flags = FlagsT{}) {
-  return __ESIMD_DNS::block_load_impl<T, NElts, L1H, L3H>(acc, offset, pred,
+  return __ESIMD_DNS::block_load_impl<T, NElts, L1H, L2H>(acc, offset, pred,
                                                           pass_thru, flags);
 }
 
 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy,
           typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
 __ESIMD_API std::enable_if_t<
@@ -1202,7 +1202,7 @@ lsc_block_load(AccessorTy acc, uint32_t offset, __ESIMD_NS::simd_mask<1> pred,
 /// @tparam NElts is the number of elements to load per address.
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam N is the number of channels (platform dependent).
 /// @param p is the base pointer.
 /// @param offsets is the zero-based offsets in bytes.
@@ -1210,14 +1210,14 @@ lsc_block_load(AccessorTy acc, uint32_t offset, __ESIMD_NS::simd_mask<1> pred,
 ///
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename Toffset>
 __ESIMD_API void lsc_prefetch(const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
                               __ESIMD_NS::simd_mask<N> pred = 1) {
   static_assert(std::is_integral_v<Toffset>, "Unsupported offset type");
   detail::check_lsc_vector_size<NElts>();
   detail::check_lsc_data_size<T, DS>();
-  detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L3H>();
+  detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L2H>();
   constexpr uint16_t _AddressScale = 1;
   constexpr int _ImmOffset = 0;
   constexpr lsc_data_size _DS =
@@ -1228,28 +1228,28 @@ __ESIMD_API void lsc_prefetch(const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
   using MsgT = typename detail::lsc_expand_type<T>::type;
   __ESIMD_NS::simd<uintptr_t, N> addrs = reinterpret_cast<uintptr_t>(p);
   addrs += convert<uintptr_t>(offsets);
-  __esimd_lsc_prefetch_stateless<MsgT, L1H, L3H, _AddressScale, _ImmOffset, _DS,
+  __esimd_lsc_prefetch_stateless<MsgT, L1H, L2H, _AddressScale, _ImmOffset, _DS,
                                  _VS, _Transposed, N>(pred.data(),
                                                       addrs.data());
 }
 
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename OffsetObjT, typename RegionTy>
 __ESIMD_API void
 lsc_prefetch(const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
              __ESIMD_NS::simd_mask<N> pred = 1) {
-  lsc_prefetch<T, NElts, DS, L1H, L3H, N>(p, offsets.read(), pred);
+  lsc_prefetch<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred);
 }
 
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename Toffset>
 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>>
 lsc_prefetch(const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
-  lsc_prefetch<T, NElts, DS, L1H, L3H, N>(
+  lsc_prefetch<T, NElts, DS, L1H, L2H, N>(
       p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
 }
 
@@ -1263,16 +1263,16 @@ lsc_prefetch(const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
 /// @tparam NElts is the number of elements to load per address.
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @param p is the base pointer.
 ///
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none>
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none>
 __ESIMD_API void lsc_prefetch(const T *p) {
   detail::check_lsc_vector_size<NElts>();
   detail::check_lsc_data_size<T, DS>();
-  detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L3H>();
+  detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L2H>();
   constexpr uint16_t _AddressScale = 1;
   constexpr int _ImmOffset = 0;
   constexpr lsc_data_size _DS = detail::finalize_data_size<T, DS>();
@@ -1287,7 +1287,7 @@ __ESIMD_API void lsc_prefetch(const T *p) {
   __ESIMD_NS::simd_mask<N> pred = 1;
 
   __ESIMD_NS::simd<uintptr_t, N> addrs = reinterpret_cast<uintptr_t>(p);
-  __esimd_lsc_prefetch_stateless<T, L1H, L3H, _AddressScale, _ImmOffset, _DS,
+  __esimd_lsc_prefetch_stateless<T, L1H, L2H, _AddressScale, _ImmOffset, _DS,
                                  _VS, _Transposed, N>(pred.data(),
                                                       addrs.data());
 }
@@ -1302,7 +1302,7 @@ __ESIMD_API void lsc_prefetch(const T *p) {
 /// @tparam NElts is the number of elements to load per address.
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam N is the number of channels (platform dependent).
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
@@ -1311,7 +1311,7 @@ __ESIMD_API void lsc_prefetch(const T *p) {
 ///
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename AccessorTy>
 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
     AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>>
@@ -1323,12 +1323,12 @@ lsc_prefetch(AccessorTy acc,
 #endif
              __ESIMD_NS::simd_mask<N> pred = 1) {
 #ifdef __ESIMD_FORCE_STATELESS_MEM
-  return lsc_prefetch<T, NElts, DS, L1H, L3H>(
+  return lsc_prefetch<T, NElts, DS, L1H, L2H>(
       __ESIMD_DNS::accessorToPointer<T>(acc), offsets, pred);
 #else
   detail::check_lsc_vector_size<NElts>();
   detail::check_lsc_data_size<T, DS>();
-  detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L3H>();
+  detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L2H>();
   constexpr uint16_t _AddressScale = 1;
   constexpr int _ImmOffset = 0;
   constexpr lsc_data_size _DS =
@@ -1338,7 +1338,7 @@ lsc_prefetch(AccessorTy acc,
       detail::lsc_data_order::nontranspose;
   using MsgT = typename detail::lsc_expand_type<T>::type;
   auto si = __ESIMD_NS::get_surface_index(acc);
-  __esimd_lsc_prefetch_bti<MsgT, L1H, L3H, _AddressScale, _ImmOffset, _DS, _VS,
+  __esimd_lsc_prefetch_bti<MsgT, L1H, L2H, _AddressScale, _ImmOffset, _DS, _VS,
                            _Transposed, N>(pred.data(), offsets.data(), si);
 #endif
 }
@@ -1346,7 +1346,7 @@ lsc_prefetch(AccessorTy acc,
 #ifdef __ESIMD_FORCE_STATELESS_MEM
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename AccessorTy, typename Toffset>
 __ESIMD_API std::enable_if_t<
     __ESIMD_DNS::is_device_accessor_with_v<
@@ -1354,7 +1354,7 @@ __ESIMD_API std::enable_if_t<
     std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>>
 lsc_prefetch(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
              __ESIMD_NS::simd_mask<N> pred = 1) {
-  lsc_prefetch<T, NElts, DS, L1H, L3H, N, AccessorTy>(
+  lsc_prefetch<T, NElts, DS, L1H, L2H, N, AccessorTy>(
       acc, convert<uint64_t>(offsets), pred);
 }
 #endif
@@ -1369,25 +1369,25 @@ lsc_prefetch(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
 /// @tparam NElts is the number of elements to load per address.
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
 /// @param offset is the zero-based offset in bytes.
 ///
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy>
 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
     AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>>
 lsc_prefetch(AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset) {
 #ifdef __ESIMD_FORCE_STATELESS_MEM
-  lsc_prefetch<T, NElts, DS, L1H, L3H>(
+  lsc_prefetch<T, NElts, DS, L1H, L2H>(
       __ESIMD_DNS::accessorToPointer<T>(acc, offset));
 #else
   detail::check_lsc_vector_size<NElts>();
   detail::check_lsc_data_size<T, DS>();
-  detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L3H>();
+  detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L2H>();
   constexpr uint16_t _AddressScale = 1;
   constexpr int _ImmOffset = 0;
   constexpr lsc_data_size _DS = detail::finalize_data_size<T, DS>();
@@ -1401,7 +1401,7 @@ lsc_prefetch(AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset) {
   __ESIMD_NS::simd_mask<N> pred = 1;
   __ESIMD_NS::simd<uint32_t, N> offsets = offset;
   auto si = __ESIMD_NS::get_surface_index(acc);
-  __esimd_lsc_prefetch_bti<T, L1H, L3H, _AddressScale, _ImmOffset, _DS, _VS,
+  __esimd_lsc_prefetch_bti<T, L1H, L2H, _AddressScale, _ImmOffset, _DS, _VS,
                            _Transposed, N>(pred.data(), offsets.data(), si);
 #endif
 }
@@ -1460,7 +1460,7 @@ __ESIMD_API void lsc_slm_block_store(uint32_t offset,
 /// @tparam NElts is the number of elements to store per address.
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam N is the number of channels (platform dependent).
 /// @param p is the base pointer.
 /// @param offsets is the zero-based offsets in bytes.
@@ -1469,34 +1469,34 @@ __ESIMD_API void lsc_slm_block_store(uint32_t offset,
 ///
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename Toffset>
 __ESIMD_API void lsc_scatter(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
                              __ESIMD_NS::simd<T, N * NElts> vals,
                              __ESIMD_NS::simd_mask<N> pred = 1) {
-  __ESIMD_DNS::scatter_impl<T, NElts, DS, L1H, L3H, N, Toffset>(p, offsets,
+  __ESIMD_DNS::scatter_impl<T, NElts, DS, L1H, L2H, N, Toffset>(p, offsets,
                                                                 vals, pred);
 }
 
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename OffsetObjT, typename RegionTy>
 __ESIMD_API void
 lsc_scatter(T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
             __ESIMD_NS::simd<T, N * NElts> vals,
             __ESIMD_NS::simd_mask<N> pred = 1) {
-  lsc_scatter<T, NElts, DS, L1H, L3H, N>(p, offsets.read(), vals, pred);
+  lsc_scatter<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), vals, pred);
 }
 
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename Toffset>
 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && N == 1>
 lsc_scatter(T *p, Toffset offset, __ESIMD_NS::simd<T, N * NElts> vals,
             __ESIMD_NS::simd_mask<N> pred = 1) {
-  lsc_scatter<T, NElts, DS, L1H, L3H, N>(
+  lsc_scatter<T, NElts, DS, L1H, L2H, N>(
       p, __ESIMD_NS::simd<Toffset, N>(offset), vals, pred);
 }
 
@@ -1510,7 +1510,7 @@ lsc_scatter(T *p, Toffset offset, __ESIMD_NS::simd<T, N * NElts> vals,
 /// @tparam NElts is the number of elements to store per address.
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam N is the number of channels (platform dependent).
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
@@ -1520,7 +1520,7 @@ lsc_scatter(T *p, Toffset offset, __ESIMD_NS::simd<T, N * NElts> vals,
 ///
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename AccessorTy>
 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
     AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>>
@@ -1529,17 +1529,17 @@ lsc_scatter(AccessorTy acc,
             __ESIMD_NS::simd<T, N * NElts> vals,
             __ESIMD_NS::simd_mask<N> pred = 1) {
 #ifdef __ESIMD_FORCE_STATELESS_MEM
-  lsc_scatter<T, NElts, DS, L1H, L3H>(__ESIMD_DNS::accessorToPointer<T>(acc),
+  lsc_scatter<T, NElts, DS, L1H, L2H>(__ESIMD_DNS::accessorToPointer<T>(acc),
                                       offsets, vals, pred);
 #else
-  __ESIMD_DNS::scatter_impl<T, NElts, DS, L1H, L3H>(acc, offsets, vals, pred);
+  __ESIMD_DNS::scatter_impl<T, NElts, DS, L1H, L2H>(acc, offsets, vals, pred);
 #endif
 }
 
 #ifdef __ESIMD_FORCE_STATELESS_MEM
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename AccessorTy, typename Toffset>
 __ESIMD_API std::enable_if_t<
     __ESIMD_DNS::is_device_accessor_with_v<
@@ -1548,14 +1548,14 @@ __ESIMD_API std::enable_if_t<
 lsc_scatter(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
             __ESIMD_NS::simd<T, N * NElts> vals,
             __ESIMD_NS::simd_mask<N> pred = 1) {
-  lsc_scatter<T, NElts, DS, L1H, L3H, N, AccessorTy>(
+  lsc_scatter<T, NElts, DS, L1H, L2H, N, AccessorTy>(
       acc, convert<uint64_t>(offsets), vals, pred);
 }
 #endif
 
 template <typename T, int NElts = 1,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N, typename AccessorTy>
 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v<
     AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>>
@@ -1590,7 +1590,7 @@ lsc_scatter(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
 /// @tparam NElts is the number of elements to store per address.
 /// @tparam DS is the data size (unused/obsolete).
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @param p is the base pointer.
 /// @param vals is values to store.
 /// @param pred is operation predicate. Zero means operation is skipped
@@ -1599,12 +1599,12 @@ lsc_scatter(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
 /// @param flags is the alignment specifier type tag.
 ///
 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
 lsc_block_store(T *p, __ESIMD_NS::simd<T, NElts> vals,
                 __ESIMD_NS::simd_mask<1> pred = 1, FlagsT flags = FlagsT{}) {
-  return __ESIMD_DNS::block_store_impl<T, NElts, L1H, L3H>(p, vals, pred,
+  return __ESIMD_DNS::block_store_impl<T, NElts, L1H, L2H>(p, vals, pred,
                                                            flags);
 }
 
@@ -1631,17 +1631,17 @@ lsc_block_store(T *p, __ESIMD_NS::simd<T, NElts> vals,
 /// @tparam NElts is the number of elements to store per address.
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @param p is the base pointer.
 /// @param vals is values to store.
 /// @param flags is the alignment specifier type tag.
 ///
 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
 lsc_block_store(T *p, __ESIMD_NS::simd<T, NElts> vals, FlagsT flags) {
-  lsc_block_store<T, NElts, DS, L1H, L3H>(p, vals, __ESIMD_NS::simd_mask<1>(1),
+  lsc_block_store<T, NElts, DS, L1H, L2H>(p, vals, __ESIMD_NS::simd_mask<1>(1),
                                           flags);
 }
 
@@ -1669,7 +1669,7 @@ lsc_block_store(T *p, __ESIMD_NS::simd<T, NElts> vals, FlagsT flags) {
 /// @tparam NElts is the number of elements to store per address.
 /// @tparam DS is the data size (unused/obsolete).
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
 /// @param offset is the zero-based offset in bytes.
@@ -1680,7 +1680,7 @@ lsc_block_store(T *p, __ESIMD_NS::simd<T, NElts> vals, FlagsT flags) {
 /// @param flags is the alignment specifier type tag.
 ///
 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy,
           typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
 __ESIMD_API std::enable_if_t<
@@ -1690,12 +1690,12 @@ __ESIMD_API std::enable_if_t<
 lsc_block_store(AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset,
                 __ESIMD_NS::simd<T, NElts> vals,
                 __ESIMD_NS::simd_mask<1> pred = 1, FlagsT flags = FlagsT{}) {
-  __ESIMD_DNS::block_store_impl<T, NElts, L1H, L3H>(acc, offset, vals, pred,
+  __ESIMD_DNS::block_store_impl<T, NElts, L1H, L2H>(acc, offset, vals, pred,
                                                     flags);
 }
 
 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy,
           typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
 __ESIMD_API std::enable_if_t<
@@ -1731,7 +1731,7 @@ lsc_block_store(AccessorTy acc, uint32_t offset,
 /// @tparam NElts is the number of elements to store per address.
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
 /// @param offset is the zero-based offset in bytes.
@@ -1739,7 +1739,7 @@ lsc_block_store(AccessorTy acc, uint32_t offset,
 /// @param flags is the alignment specifier type tag.
 ///
 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy,
           typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
 __ESIMD_API std::enable_if_t<
@@ -1748,7 +1748,7 @@ __ESIMD_API std::enable_if_t<
     __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
 lsc_block_store(AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset,
                 __ESIMD_NS::simd<T, NElts> vals, FlagsT flags) {
-  lsc_block_store<T, NElts, DS, L1H, L3H>(acc, offset, vals,
+  lsc_block_store<T, NElts, DS, L1H, L2H>(acc, offset, vals,
                                           __ESIMD_NS::simd_mask<1>(1), flags);
 }
 
@@ -1838,7 +1838,7 @@ constexpr void check_lsc_block_2d_restrictions() {
 /// @tparam Transposed is the transposed version or not.
 /// @tparam Transformed is apply VNNI transform or not.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam N is the data size
 /// @param Ptr is the surface base address for this operation.
 /// @param SurfaceWidth is the surface width minus 1 in bytes
@@ -1856,14 +1856,14 @@ constexpr void check_lsc_block_2d_restrictions() {
 ///
 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
           bool Transposed = false, bool Transformed = false,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N = detail::get_lsc_block_2d_data_size<
               T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
 __ESIMD_API __ESIMD_NS::simd<T, N>
 lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight,
             unsigned SurfacePitch, int X, int Y) {
   using RawT = __ESIMD_DNS::__raw_t<T>;
-  detail::check_lsc_cache_hint<detail::lsc_action::load, L1H, L3H>();
+  detail::check_lsc_cache_hint<detail::lsc_action::load, L1H, L2H>();
   detail::check_lsc_block_2d_restrictions<RawT, BlockWidth, BlockHeight,
                                           NBlocks, Transposed, Transformed,
                                           detail::block_2d_op::load>();
@@ -1900,7 +1900,7 @@ lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight,
       Transposed ? detail::lsc_data_order::transpose
                  : detail::lsc_data_order::nontranspose;
   __ESIMD_NS::simd<RawT, ActualN> Raw =
-      __esimd_lsc_load2d_stateless<RawT, L1H, L3H, DS, _Transposed, NBlocks,
+      __esimd_lsc_load2d_stateless<RawT, L1H, L2H, DS, _Transposed, NBlocks,
                                    BlockWidth, BlockHeight, Transformed,
                                    ActualN>(pred.data(), surf_addr,
                                             SurfaceWidth, SurfaceHeight,
@@ -1954,7 +1954,7 @@ lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight,
 /// @tparam BlockHeight is the block height in number of elements.
 /// @tparam NBlocks is the number of blocks.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam N is the data size
 /// @param Ptr is the surface base address for this operation.
 /// @param SurfaceWidth is the surface width minus 1 in bytes
@@ -1966,13 +1966,13 @@ lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight,
 /// rows.
 ///
 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N = detail::get_lsc_block_2d_data_size<
               T, NBlocks, BlockHeight, BlockWidth, false, false>()>
 __ESIMD_API void lsc_prefetch_2d(const T *Ptr, unsigned SurfaceWidth,
                                  unsigned SurfaceHeight, unsigned SurfacePitch,
                                  int X, int Y) {
-  detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L3H>();
+  detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L2H>();
   detail::check_lsc_block_2d_restrictions<T, BlockWidth, BlockHeight, NBlocks,
                                           false, false,
                                           detail::block_2d_op::prefetch>();
@@ -1982,7 +1982,7 @@ __ESIMD_API void lsc_prefetch_2d(const T *Ptr, unsigned SurfaceWidth,
   uintptr_t surf_addr = reinterpret_cast<uintptr_t>(Ptr);
   constexpr detail::lsc_data_order _Transposed =
       detail::lsc_data_order::nontranspose;
-  __esimd_lsc_prefetch2d_stateless<T, L1H, L3H, DS, _Transposed, NBlocks,
+  __esimd_lsc_prefetch2d_stateless<T, L1H, L2H, DS, _Transposed, NBlocks,
                                    BlockWidth, BlockHeight, false, N>(
       pred.data(), surf_addr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y);
 }
@@ -1997,7 +1997,7 @@ __ESIMD_API void lsc_prefetch_2d(const T *Ptr, unsigned SurfaceWidth,
 /// @tparam BlockWidth is the block width in number of elements.
 /// @tparam BlockHeight is the block height in number of elements.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam N is the data size
 /// @param Ptr is the surface base address for this operation.
 /// @param SurfaceWidth is the surface width minus 1 in bytes
@@ -2012,14 +2012,14 @@ __ESIMD_API void lsc_prefetch_2d(const T *Ptr, unsigned SurfaceWidth,
 ///   getNextPowerOf2(BlockWidth) * NBlocks
 ///
 template <typename T, int BlockWidth, int BlockHeight = 1,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N = detail::get_lsc_block_2d_data_size<
               T, 1u, BlockHeight, BlockWidth, false, false>()>
 __ESIMD_API void lsc_store_2d(T *Ptr, unsigned SurfaceWidth,
                               unsigned SurfaceHeight, unsigned SurfacePitch,
                               int X, int Y, __ESIMD_NS::simd<T, N> Vals) {
   using RawT = __ESIMD_DNS::__raw_t<T>;
-  detail::check_lsc_cache_hint<detail::lsc_action::store, L1H, L3H>();
+  detail::check_lsc_cache_hint<detail::lsc_action::store, L1H, L2H>();
   detail::check_lsc_block_2d_restrictions<RawT, BlockWidth, BlockHeight, 1,
                                           false, false,
                                           detail::block_2d_op::store>();
@@ -2043,7 +2043,7 @@ __ESIMD_API void lsc_store_2d(T *Ptr, unsigned SurfaceWidth,
   }
 
   __ESIMD_NS::simd_mask<BlockHeight * Pitch> pred = 1;
-  __esimd_lsc_store2d_stateless<RawT, L1H, L3H, DS, _Transposed, 1u, BlockWidth,
+  __esimd_lsc_store2d_stateless<RawT, L1H, L2H, DS, _Transposed, 1u, BlockWidth,
                                 BlockHeight, false, BlockHeight * Pitch>(
       pred.data(), surf_addr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y,
       Raw.data());
@@ -2236,19 +2236,19 @@ class config_2d_mem_access {
   __ESIMD_NS::simd<uint32_t, 16> payload_data;
 
   template <typename T1, int BlockWidth1, int BlockHeight1, int NBlocks1,
-            bool Transposed1, bool Transformed1, cache_hint L1H, cache_hint L3H,
+            bool Transposed1, bool Transformed1, cache_hint L1H, cache_hint L2H,
             int N>
   friend ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T1, N> lsc_load_2d(
       config_2d_mem_access<T1, BlockWidth1, BlockHeight1, NBlocks1> &payload);
 
   template <typename T1, int BlockWidth1, int BlockHeight1, int NBlocks1,
-            cache_hint L1H, cache_hint L3H, int N>
+            cache_hint L1H, cache_hint L2H, int N>
   friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_store_2d(
       config_2d_mem_access<T1, BlockWidth1, BlockHeight1, NBlocks1> &payload,
       __ESIMD_NS::simd<T1, N> Data);
 
   template <typename T1, int BlockWidth1, int BlockHeight1, int NBlocks1,
-            bool Transposed1, bool Transformed1, cache_hint L1H, cache_hint L3H,
+            bool Transposed1, bool Transformed1, cache_hint L1H, cache_hint L2H,
             int N>
   friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d(
       config_2d_mem_access<T1, BlockWidth1, BlockHeight1, NBlocks1> &payload);
@@ -2266,7 +2266,7 @@ class config_2d_mem_access {
 /// @tparam Transposed is the transposed version or not.
 /// @tparam Transformed is apply VNNI transform or not.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam N is the data size
 /// @param payload is \c config_2d_mem_access \c object holding all the data
 /// @return is a vector of type T and size N, where N is
@@ -2275,7 +2275,7 @@ class config_2d_mem_access {
 ///
 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
           bool Transposed = false, bool Transformed = false,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N = detail::get_lsc_block_2d_data_size<
               T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
 ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T, N> lsc_load_2d(
@@ -2283,7 +2283,7 @@ ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T, N> lsc_load_2d(
   detail::check_lsc_block_2d_restrictions<T, BlockWidth, BlockHeight, NBlocks,
                                           Transposed, Transformed,
                                           detail::block_2d_op::load>();
-  detail::check_lsc_cache_hint<detail::lsc_action::load, L1H, L3H>();
+  detail::check_lsc_cache_hint<detail::lsc_action::load, L1H, L2H>();
   constexpr int ElemsPerDword = 4 / sizeof(T);
   constexpr int GRFRowSize = Transposed    ? BlockHeight
                              : Transformed ? BlockWidth * ElemsPerDword
@@ -2311,7 +2311,7 @@ ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T, N> lsc_load_2d(
 
   static_assert(N == ActualN || N == DstElements, "Incorrect element count");
 
-  constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L3H>()
+  constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
                                   << 17;
   constexpr uint32_t base_desc = 0x2000003;
   constexpr uint32_t transformMask = Transformed ? 1 << 7 : 0;
@@ -2363,24 +2363,24 @@ ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T, N> lsc_load_2d(
 /// @tparam Transposed is the transposed version or not.
 /// @tparam Transformed is apply VNNI transform or not.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam N is the data size
 /// @param payload is \c config_2d_mem_access \c object holding all the data
 ///
 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
           bool Transposed = false, bool Transformed = false,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N = detail::get_lsc_block_2d_data_size<
               T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
 ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d(
     config_2d_mem_access<T, BlockWidth, BlockHeight, NBlocks> &payload) {
-  detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L3H>();
+  detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L2H>();
   detail::check_lsc_block_2d_restrictions<T, BlockWidth, BlockHeight, NBlocks,
                                           Transposed, Transformed,
                                           detail::block_2d_op::prefetch>();
   static_assert(!Transposed || !Transformed,
                 "Transposed and transformed is not supported");
-  constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L3H>()
+  constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
                                   << 17;
   constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
   constexpr uint32_t base_desc = 0x2000003;
@@ -2406,13 +2406,13 @@ ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d(
 /// @tparam BlockHeight block height in number of elements
 /// @tparam NBlocks Number of blocks
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam N is the data size
 /// @param payload is \c config_2d_mem_access \c object holding all the data
 /// @param Data is the data to be stored.
 ///
 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           int N = detail::get_lsc_block_2d_data_size<
               T, NBlocks, BlockHeight, BlockWidth, false, false>()>
 ESIMD_INLINE SYCL_ESIMD_FUNCTION void
@@ -2421,9 +2421,9 @@ lsc_store_2d(config_2d_mem_access<T, BlockWidth, BlockHeight, NBlocks> &payload,
   detail::check_lsc_block_2d_restrictions<T, BlockWidth, BlockHeight, NBlocks,
                                           false, false,
                                           detail::block_2d_op::store>();
-  detail::check_lsc_cache_hint<detail::lsc_action::store, L1H, L3H>();
+  detail::check_lsc_cache_hint<detail::lsc_action::store, L1H, L2H>();
 
-  constexpr uint32_t cache_mask = detail::get_lsc_store_cache_mask<L1H, L3H>()
+  constexpr uint32_t cache_mask = detail::get_lsc_store_cache_mask<L1H, L2H>()
                                   << 17;
   constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
   constexpr uint32_t base_desc = 0x2000007;
@@ -2532,32 +2532,32 @@ lsc_slm_atomic_update(__ESIMD_NS::simd<uint32_t, N> offsets,
 /// @tparam N is the number of channels (platform dependent).
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @param p is the base pointer.
 /// @param offsets is the zero-based offsets.
 /// @param pred is predicates.
 ///
 template <__ESIMD_NS::atomic_op Op, typename T, int N,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename Toffset>
 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0,
                              __ESIMD_NS::simd<T, N>>
 lsc_atomic_update(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
                   __ESIMD_NS::simd_mask<N> pred) {
-  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, L1H, L3H, Toffset>(
+  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, L1H, L2H, Toffset>(
       p, offsets, pred);
 }
 
 template <__ESIMD_NS::atomic_op Op, typename T, int N,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename Toffset>
 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
                                  __ESIMD_DNS::get_num_args<Op>() == 0,
                              __ESIMD_NS::simd<T, N>>
 lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
-  return lsc_atomic_update<Op, T, N, DS, L1H, L3H>(
+  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
       p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
 }
 
@@ -2570,7 +2570,7 @@ lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
 /// @tparam N is the number of channels (platform dependent).
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @param p is the base pointer.
 /// @param offsets is the zero-based offsets.
 /// @param src0 is the first atomic operand.
@@ -2578,32 +2578,32 @@ lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
 ///
 template <__ESIMD_NS::atomic_op Op, typename T, int N,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename Toffset>
 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1,
                              __ESIMD_NS::simd<T, N>>
 lsc_atomic_update(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
                   __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd_mask<N> pred) {
-  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, L1H, L3H, Toffset>(
+  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, L1H, L2H, Toffset>(
       p, offsets, src0, pred);
 }
 
 template <__ESIMD_NS::atomic_op Op, typename T, int N,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename OffsetObjT, typename RegionTy>
 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1,
                              __ESIMD_NS::simd<T, N>>
 lsc_atomic_update(T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
                   __ESIMD_NS::simd<T, N> src0,
                   __ESIMD_NS::simd_mask<N> pred = 1) {
-  return lsc_atomic_update<Op, T, N, DS, L1H, L3H>(p, offsets.read(), src0,
+  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(p, offsets.read(), src0,
                                                    pred);
 }
 
 template <__ESIMD_NS::atomic_op Op, typename T, int N,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename Toffset>
 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
                                  __ESIMD_DNS::get_num_args<Op>() == 1 &&
@@ -2613,7 +2613,7 @@ __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
                              __ESIMD_NS::simd<T, N>>
 lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd<T, N> src0,
                   __ESIMD_NS::simd_mask<N> pred = 1) {
-  return lsc_atomic_update<Op, T, N, DS, L1H, L3H>(
+  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
       p, __ESIMD_NS::simd<Toffset, N>(offset), src0, pred);
 }
 
@@ -2626,7 +2626,7 @@ lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd<T, N> src0,
 /// @tparam N is the number of channels (platform dependent).
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @param p is the base pointer.
 /// @param offsets is the zero-based offsets.
 /// @param src0 is the first atomic operand (expected value).
@@ -2635,33 +2635,33 @@ lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd<T, N> src0,
 ///
 template <__ESIMD_NS::atomic_op Op, typename T, int N,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename Toffset>
 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2,
                              __ESIMD_NS::simd<T, N>>
 lsc_atomic_update(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
                   __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
                   __ESIMD_NS::simd_mask<N> pred) {
-  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, L1H, L3H, Toffset>(
+  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, L1H, L2H, Toffset>(
       p, offsets, src0, src1, pred);
 }
 
 template <__ESIMD_NS::atomic_op Op, typename T, int N,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename OffsetObjT, typename RegionTy>
 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2,
                              __ESIMD_NS::simd<T, N>>
 lsc_atomic_update(T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
                   __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
                   __ESIMD_NS::simd_mask<N> pred = 1) {
-  return lsc_atomic_update<Op, T, N, DS, L1H, L3H>(p, offsets.read(), src0,
+  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(p, offsets.read(), src0,
                                                    src1, pred);
 }
 
 template <__ESIMD_NS::atomic_op Op, typename T, int N,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename Toffset>
 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
                                  __ESIMD_DNS::get_num_args<Op>() == 2,
@@ -2669,7 +2669,7 @@ __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
 lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd<T, N> src0,
                   __ESIMD_NS::simd<T, N> src1,
                   __ESIMD_NS::simd_mask<N> pred = 1) {
-  return lsc_atomic_update<Op, T, N, DS, L1H, L3H>(
+  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
       p, __ESIMD_NS::simd<Toffset, N>(offset), src0, src1, pred);
 }
 
@@ -2682,7 +2682,7 @@ lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd<T, N> src0,
 /// @tparam N is the number of channels (platform dependent).
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
 /// @param offsets is the zero-based offsets.
@@ -2692,7 +2692,7 @@ lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd<T, N> src0,
 ///   update.
 template <__ESIMD_NS::atomic_op Op, typename T, int N,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy, typename Toffset>
 __ESIMD_API std::enable_if_t<
     __ESIMD_DNS::is_device_accessor_with_v<
@@ -2703,7 +2703,7 @@ __ESIMD_API std::enable_if_t<
     __ESIMD_NS::simd<T, N>>
 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
                   __ESIMD_NS::simd_mask<N> pred) {
-  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, L1H, L3H>(acc, offsets,
+  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, L1H, L2H>(acc, offsets,
                                                                  pred);
 }
 
@@ -2714,7 +2714,7 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
 /// @tparam N is the number of channels (platform dependent).
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
 /// @param offsets is the zero-based offsets.
@@ -2724,7 +2724,7 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
 ///   update.
 template <__ESIMD_NS::atomic_op Op, typename T, int N,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy>
 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
                              __ESIMD_NS::simd<T, N>>
@@ -2743,7 +2743,7 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
 /// @tparam N is the number of channels (platform dependent).
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
 /// @param offsets is the zero-based offsets.
@@ -2754,13 +2754,13 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
 ///   update.
 template <__ESIMD_NS::atomic_op Op, typename T, int N,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy, typename Toffset>
 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
                              __ESIMD_NS::simd<T, N>>
 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
                   __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd_mask<N> pred) {
-  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, L1H, L3H>(acc, offsets,
+  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, L1H, L2H>(acc, offsets,
                                                                  src0, pred);
 }
 
@@ -2771,7 +2771,7 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
 /// @tparam N is the number of channels (platform dependent).
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
 /// @param offsets is the zero-based offsets.
@@ -2782,7 +2782,7 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
 ///   update.
 template <__ESIMD_NS::atomic_op Op, typename T, int N,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy>
 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
                              __ESIMD_NS::simd<T, N>>
@@ -2801,7 +2801,7 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
 /// @tparam N is the number of channels (platform dependent).
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
 /// @param offsets is the zero-based offsets.
@@ -2813,7 +2813,7 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
 ///   update.
 template <__ESIMD_NS::atomic_op Op, typename T, int N,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy, typename Toffset>
 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
                              __ESIMD_NS::simd<T, N>>
@@ -2821,7 +2821,7 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
                   __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
                   __ESIMD_NS::simd_mask<N> pred) {
 #ifdef __ESIMD_FORCE_STATELESS_MEM
-  return lsc_atomic_update<Op, T, N, DS, L1H, L3H>(
+  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
       __ESIMD_DNS::accessorToPointer<T>(acc), offsets, src0, src1, pred);
 #else
   static_assert(std::is_integral_v<Toffset> && sizeof(Toffset) == 4,
@@ -2829,7 +2829,7 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
   detail::check_lsc_vector_size<1>();
   detail::check_lsc_data_size<T, DS>();
   __ESIMD_DNS::check_atomic<Op, T, N, 2>();
-  detail::check_lsc_cache_hint<detail::lsc_action::atomic, L1H, L3H>();
+  detail::check_lsc_cache_hint<detail::lsc_action::atomic, L1H, L2H>();
   constexpr uint16_t _AddressScale = 1;
   constexpr int _ImmOffset = 0;
   constexpr lsc_data_size _DS =
@@ -2843,7 +2843,7 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
   __ESIMD_NS::simd<MsgT, N> Msg_data1 = detail::lsc_format_input<MsgT>(src1);
   auto si = __ESIMD_NS::get_surface_index(acc);
   __ESIMD_NS::simd<MsgT, N> Tmp =
-      __esimd_lsc_xatomic_bti_2<MsgT, IOp, L1H, L3H, _AddressScale, _ImmOffset,
+      __esimd_lsc_xatomic_bti_2<MsgT, IOp, L1H, L2H, _AddressScale, _ImmOffset,
                                 _DS, _VS, _Transposed, N>(
           pred.data(), offsets.data(), Msg_data0.data(), Msg_data1.data(), si);
   return detail::lsc_format_ret<T>(Tmp);
@@ -2857,7 +2857,7 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
 /// @tparam N is the number of channels (platform dependent).
 /// @tparam DS is the data size.
 /// @tparam L1H is L1 cache hint.
-/// @tparam L3H is L3 cache hint.
+/// @tparam L2H is L2 cache hint.
 /// @tparam AccessorTy is the \ref sycl::accessor type.
 /// @param acc is the SYCL accessor.
 /// @param offsets is the zero-based offsets.
@@ -2869,7 +2869,7 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
 ///   update.
 template <__ESIMD_NS::atomic_op Op, typename T, int N,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename AccessorTy>
 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
                              __ESIMD_NS::simd<T, N>>
diff --git a/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_load_prefetch_2d.hpp b/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_load_prefetch_2d.hpp
index 5857bce003f89..42c437700c0c1 100644
--- a/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_load_prefetch_2d.hpp
+++ b/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_load_prefetch_2d.hpp
@@ -20,7 +20,7 @@ using namespace sycl::ext::intel::experimental::esimd;
 template <int case_num, typename T, uint32_t Groups, uint32_t Threads,
           int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
           bool Transposed = false, bool Transformed = false,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           bool use_prefetch = false>
 bool test(unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch,
           int X, int Y) {
@@ -90,14 +90,14 @@ bool test(unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch,
 
             simd<T, N> vals;
             if constexpr (use_prefetch) {
-              lsc_prefetch_2d<T, BlockWidth, BlockHeight, NBlocks, L1H, L3H>(
+              lsc_prefetch_2d<T, BlockWidth, BlockHeight, NBlocks, L1H, L2H>(
                   in + off, width, height, pitch, X, Y);
               vals = lsc_load_2d<T, BlockWidth, BlockHeight, NBlocks,
                                  Transposed, Transformed>(in + off, width,
                                                           height, pitch, X, Y);
             } else {
               vals = lsc_load_2d<T, BlockWidth, BlockHeight, NBlocks,
-                                 Transposed, Transformed, L1H, L3H>(
+                                 Transposed, Transformed, L1H, L2H>(
                   in + off, width, height, pitch, X, Y);
             }
 
diff --git a/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_store_2d.hpp b/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_store_2d.hpp
index 3264a16ffc2df..ee553fd12c33b 100644
--- a/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_store_2d.hpp
+++ b/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_store_2d.hpp
@@ -53,7 +53,7 @@ template <int case_num, typename T, uint32_t Groups, uint32_t Threads,
           int BlockWidth, int BlockHeight = 1,
           int N = get_lsc_block_2d_data_size<T, 1u, BlockHeight, BlockWidth,
                                              false, false>(),
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none>
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none>
 bool test(unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch,
           int X, int Y) {
 
@@ -89,7 +89,7 @@ bool test(unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch,
 
             simd<T, N> vals(new_val + off, 1);
             // IUT
-            lsc_store_2d<T, BlockWidth, BlockHeight, L1H, L3H>(
+            lsc_store_2d<T, BlockWidth, BlockHeight, L1H, L2H>(
                 out + off, SurfaceWidth * sizeof(T) - 1, SurfaceHeight - 1,
                 SurfacePitch * sizeof(T) - 1, X, Y, vals);
           });
diff --git a/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_surf_load.hpp b/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_surf_load.hpp
index 97ad1a68dfadd..54359e458793a 100644
--- a/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_surf_load.hpp
+++ b/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_surf_load.hpp
@@ -20,7 +20,7 @@ using namespace sycl::ext::intel::experimental::esimd;
 template <
     int case_num, typename T, uint32_t Groups, uint32_t Threads, uint16_t VL,
     uint16_t VS, bool transpose, lsc_data_size DS = lsc_data_size::default_size,
-    cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+    cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
     bool use_prefetch = false, typename Flags = __ESIMD_NS::overaligned_tag<4>>
 bool test(uint32_t pmask = 0xffffffff) {
   static_assert((VL == 1) || !transpose, "Transpose must have exec size 1");
@@ -87,19 +87,19 @@ bool test(uint32_t pmask = 0xffffffff) {
             if constexpr (transpose) {
               simd<T, VS> vals;
               if constexpr (use_prefetch) {
-                lsc_prefetch<T, VS, DS, L1H, L3H>(acci, byte_off);
+                lsc_prefetch<T, VS, DS, L1H, L2H>(acci, byte_off);
                 if constexpr (sizeof(T) < 8) {
-                  vals = lsc_block_load<T, VS, DS, L1H, L3H>(acci, byte_off,
+                  vals = lsc_block_load<T, VS, DS, L1H, L2H>(acci, byte_off,
                                                              Flags{});
                 } else {
-                  vals = lsc_block_load<T, VS, DS, L1H, L3H>(acci, byte_off);
+                  vals = lsc_block_load<T, VS, DS, L1H, L2H>(acci, byte_off);
                 }
               } else {
                 if constexpr (sizeof(T) < 8) {
-                  vals = lsc_block_load<T, VS, DS, L1H, L3H>(acci, byte_off,
+                  vals = lsc_block_load<T, VS, DS, L1H, L2H>(acci, byte_off,
                                                              Flags{});
                 } else {
-                  vals = lsc_block_load<T, VS, DS, L1H, L3H>(acci, byte_off);
+                  vals = lsc_block_load<T, VS, DS, L1H, L2H>(acci, byte_off);
                 }
               }
               if constexpr (sizeof(T) < 8) {
@@ -115,11 +115,11 @@ bool test(uint32_t pmask = 0xffffffff) {
 
               simd<T, VS * VL> vals;
               if constexpr (use_prefetch) {
-                lsc_prefetch<T, VS, DS, L1H, L3H, VL>(acci, offset, pred);
+                lsc_prefetch<T, VS, DS, L1H, L2H, VL>(acci, offset, pred);
                 vals = lsc_gather<T, VS, DS, cache_hint::none, cache_hint::none,
                                   VL>(acci, offset, pred);
               } else {
-                vals = lsc_gather<T, VS, DS, L1H, L3H, VL>(acci, offset, pred);
+                vals = lsc_gather<T, VS, DS, L1H, L2H, VL>(acci, offset, pred);
               }
 
               if constexpr (DS == lsc_data_size::u8u32 ||
diff --git a/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_surf_store.hpp b/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_surf_store.hpp
index 0331db4ba8a73..ea118012f07f5 100644
--- a/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_surf_store.hpp
+++ b/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_surf_store.hpp
@@ -20,7 +20,7 @@ using namespace sycl::ext::intel::experimental::esimd;
 template <int case_num, typename T, uint32_t Groups, uint32_t Threads,
           uint16_t VL, uint16_t VS, bool transpose,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename Flags = __ESIMD_NS::overaligned_tag<4>>
 bool test(uint32_t pmask = 0xffffffff) {
   static_assert((VL == 1) || !transpose, "Transpose must have exec size 1");
@@ -82,10 +82,10 @@ bool test(uint32_t pmask = 0xffffffff) {
             if constexpr (transpose) {
               simd<T, VS> vals(new_val + elem_off, 1);
               if constexpr (sizeof(T) < 8) {
-                lsc_block_store<T, VS, DS, L1H, L3H>(acco, byte_off, vals,
+                lsc_block_store<T, VS, DS, L1H, L2H>(acco, byte_off, vals,
                                                      Flags{});
               } else {
-                lsc_block_store<T, VS, DS, L1H, L3H>(acco, byte_off, vals);
+                lsc_block_store<T, VS, DS, L1H, L2H>(acco, byte_off, vals);
               }
             } else {
               simd<uint32_t, VL> offset(byte_off, VS * sizeof(T));
@@ -99,7 +99,7 @@ bool test(uint32_t pmask = 0xffffffff) {
                 for (int j = 0; j < VS; j++)
                   vals.template select<1, 1>(i + j * VL) = val++;
 
-              lsc_scatter<T, VS, DS, L1H, L3H, VL>(acco, offset, vals, pred);
+              lsc_scatter<T, VS, DS, L1H, L2H, VL>(acco, offset, vals, pred);
             }
           });
     });
diff --git a/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_usm_block_load_prefetch.hpp b/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_usm_block_load_prefetch.hpp
index bab1dc2991894..f82e0d0dc52c5 100644
--- a/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_usm_block_load_prefetch.hpp
+++ b/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_usm_block_load_prefetch.hpp
@@ -20,7 +20,7 @@ using namespace sycl::ext::intel::experimental::esimd;
 
 template <typename T, uint16_t N,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           bool UsePrefetch = false, bool UseOldValuesOperand = true,
           typename Flags = __ESIMD_NS::overaligned_tag<4>>
 bool test(queue Q, uint32_t Groups, uint32_t Threads) {
@@ -63,7 +63,7 @@ bool test(queue Q, uint32_t Groups, uint32_t Threads) {
 
          simd_mask<1> Mask = GlobalID % 1;
          if constexpr (UsePrefetch) {
-           lsc_prefetch<T, N, DS, L1H, L3H>(In + ElemOffset);
+           lsc_prefetch<T, N, DS, L1H, L2H>(In + ElemOffset);
            if constexpr (sizeof(T) < 8) {
              Vals = lsc_block_load<T, N, DS>(In + ElemOffset, Mask, OldValues,
                                              Flags{});
@@ -72,16 +72,16 @@ bool test(queue Q, uint32_t Groups, uint32_t Threads) {
            }
          } else {
            if constexpr (sizeof(T) < 8) {
-             Vals = lsc_block_load<T, N, DS, L1H, L3H>(In + ElemOffset, Mask,
+             Vals = lsc_block_load<T, N, DS, L1H, L2H>(In + ElemOffset, Mask,
                                                        OldValues, Flags{});
            } else {
-             Vals = lsc_block_load<T, N, DS, L1H, L3H>(In + ElemOffset, Mask,
+             Vals = lsc_block_load<T, N, DS, L1H, L2H>(In + ElemOffset, Mask,
                                                        OldValues);
            }
          }
        } else {
          if constexpr (UsePrefetch) {
-           lsc_prefetch<T, N, DS, L1H, L3H>(In + ElemOffset);
+           lsc_prefetch<T, N, DS, L1H, L2H>(In + ElemOffset);
            if constexpr (sizeof(T) < 8) {
              Vals = lsc_block_load<T, N, DS>(In + ElemOffset, Flags{});
            } else {
@@ -90,9 +90,9 @@ bool test(queue Q, uint32_t Groups, uint32_t Threads) {
          } else {
            if constexpr (sizeof(T) < 8) {
              Vals =
-                 lsc_block_load<T, N, DS, L1H, L3H>(In + ElemOffset, Flags{});
+                 lsc_block_load<T, N, DS, L1H, L2H>(In + ElemOffset, Flags{});
            } else {
-             Vals = lsc_block_load<T, N, DS, L1H, L3H>(In + ElemOffset);
+             Vals = lsc_block_load<T, N, DS, L1H, L2H>(In + ElemOffset);
            }
          }
        }
@@ -138,7 +138,7 @@ bool test(queue Q, uint32_t Groups, uint32_t Threads) {
 template <typename T> bool test_lsc_block_load() {
   constexpr lsc_data_size DS = lsc_data_size::default_size;
   constexpr cache_hint L1H = cache_hint::none;
-  constexpr cache_hint L3H = cache_hint::none;
+  constexpr cache_hint L2H = cache_hint::none;
 
   constexpr bool NoPrefetch = false;
   constexpr bool CheckMerge = true;
@@ -150,52 +150,52 @@ template <typename T> bool test_lsc_block_load() {
             << Q.get_device().get_info<sycl::info::device::name>() << std::endl;
 
   bool Passed = true;
-  Passed &= test<T, 64, DS, L1H, L3H, NoPrefetch, NoCheckMerge>(Q, 1, 4);
-  Passed &= test<T, 32, DS, L1H, L3H, NoPrefetch, NoCheckMerge>(Q, 1, 4);
-  Passed &= test<T, 16, DS, L1H, L3H, NoPrefetch, NoCheckMerge>(Q, 2, 2);
-  Passed &= test<T, 8, DS, L1H, L3H, NoPrefetch, NoCheckMerge>(Q, 2, 8);
-  Passed &= test<T, 4, DS, L1H, L3H, NoPrefetch, NoCheckMerge>(Q, 3, 3);
+  Passed &= test<T, 64, DS, L1H, L2H, NoPrefetch, NoCheckMerge>(Q, 1, 4);
+  Passed &= test<T, 32, DS, L1H, L2H, NoPrefetch, NoCheckMerge>(Q, 1, 4);
+  Passed &= test<T, 16, DS, L1H, L2H, NoPrefetch, NoCheckMerge>(Q, 2, 2);
+  Passed &= test<T, 8, DS, L1H, L2H, NoPrefetch, NoCheckMerge>(Q, 2, 8);
+  Passed &= test<T, 4, DS, L1H, L2H, NoPrefetch, NoCheckMerge>(Q, 3, 3);
   if constexpr (sizeof(T) * 2 >= sizeof(int))
-    Passed &= test<T, 2, DS, L1H, L3H, NoPrefetch, NoCheckMerge>(Q, 5, 5);
+    Passed &= test<T, 2, DS, L1H, L2H, NoPrefetch, NoCheckMerge>(Q, 5, 5);
   if constexpr (sizeof(T) >= sizeof(int))
-    Passed &= test<T, 1, DS, L1H, L3H, NoPrefetch, CheckMerge>(Q, 3, 5);
+    Passed &= test<T, 1, DS, L1H, L2H, NoPrefetch, CheckMerge>(Q, 3, 5);
   if constexpr (sizeof(T) <= 4) {
-    Passed &= test<T, 128, DS, L1H, L3H, NoPrefetch, CheckMerge,
+    Passed &= test<T, 128, DS, L1H, L2H, NoPrefetch, CheckMerge,
                    __ESIMD_NS::overaligned_tag<8>>(Q, 1, 4);
-    Passed &= test<T, 128, DS, L1H, L3H, NoPrefetch, NoCheckMerge,
+    Passed &= test<T, 128, DS, L1H, L2H, NoPrefetch, NoCheckMerge,
                    __ESIMD_NS::overaligned_tag<8>>(Q, 1, 4);
     if constexpr (sizeof(T) == 2) {
-      Passed &= test<T, 256, DS, L1H, L3H, NoPrefetch, CheckMerge,
+      Passed &= test<T, 256, DS, L1H, L2H, NoPrefetch, CheckMerge,
                      __ESIMD_NS::overaligned_tag<8>>(Q, 1, 4);
-      Passed &= test<T, 256, DS, L1H, L3H, NoPrefetch, NoCheckMerge,
+      Passed &= test<T, 256, DS, L1H, L2H, NoPrefetch, NoCheckMerge,
                      __ESIMD_NS::overaligned_tag<8>>(Q, 1, 4);
     }
     if constexpr (sizeof(T) == 1) {
-      Passed &= test<T, 512, DS, L1H, L3H, NoPrefetch, CheckMerge,
+      Passed &= test<T, 512, DS, L1H, L2H, NoPrefetch, CheckMerge,
                      __ESIMD_NS::overaligned_tag<8>>(Q, 1, 4);
-      Passed &= test<T, 512, DS, L1H, L3H, NoPrefetch, NoCheckMerge,
+      Passed &= test<T, 512, DS, L1H, L2H, NoPrefetch, NoCheckMerge,
                      __ESIMD_NS::overaligned_tag<8>>(Q, 1, 4);
     }
   }
 
-  Passed &= test<T, 64, DS, L1H, L3H, NoPrefetch, CheckMerge>(Q, 1, 4);
-  Passed &= test<T, 32, DS, L1H, L3H, NoPrefetch, CheckMerge>(Q, 2, 2);
-  Passed &= test<T, 16, DS, L1H, L3H, NoPrefetch, CheckMerge>(Q, 4, 4);
-  Passed &= test<T, 8, DS, L1H, L3H, NoPrefetch, CheckMerge>(Q, 2, 8);
-  Passed &= test<T, 4, DS, L1H, L3H, NoPrefetch, CheckMerge>(Q, 3, 3);
+  Passed &= test<T, 64, DS, L1H, L2H, NoPrefetch, CheckMerge>(Q, 1, 4);
+  Passed &= test<T, 32, DS, L1H, L2H, NoPrefetch, CheckMerge>(Q, 2, 2);
+  Passed &= test<T, 16, DS, L1H, L2H, NoPrefetch, CheckMerge>(Q, 4, 4);
+  Passed &= test<T, 8, DS, L1H, L2H, NoPrefetch, CheckMerge>(Q, 2, 8);
+  Passed &= test<T, 4, DS, L1H, L2H, NoPrefetch, CheckMerge>(Q, 3, 3);
   if constexpr (sizeof(T) * 2 >= sizeof(int))
-    Passed &= test<T, 2, DS, L1H, L3H, NoPrefetch, CheckMerge>(Q, 5, 5);
+    Passed &= test<T, 2, DS, L1H, L2H, NoPrefetch, CheckMerge>(Q, 5, 5);
   if constexpr (sizeof(T) >= sizeof(int))
-    Passed &= test<T, 1, DS, L1H, L3H, NoPrefetch, CheckMerge>(Q, 3, 5);
+    Passed &= test<T, 1, DS, L1H, L2H, NoPrefetch, CheckMerge>(Q, 3, 5);
   // Only 512-bits maximum can be loaded at once (i.e. 4*128 bytes).
   if constexpr (sizeof(T) <= 4)
-    Passed &= test<T, 128, DS, L1H, L3H, NoPrefetch, CheckMerge,
+    Passed &= test<T, 128, DS, L1H, L2H, NoPrefetch, CheckMerge,
                    __ESIMD_NS::overaligned_tag<8>>(Q, 1, 4);
   if constexpr (sizeof(T) <= 2)
-    Passed &= test<T, 256, DS, L1H, L3H, NoPrefetch, CheckMerge,
+    Passed &= test<T, 256, DS, L1H, L2H, NoPrefetch, CheckMerge,
                    __ESIMD_NS::overaligned_tag<8>>(Q, 1, 4);
   if constexpr (sizeof(T) == 1)
-    Passed &= test<T, 512, DS, L1H, L3H, NoPrefetch, CheckMerge,
+    Passed &= test<T, 512, DS, L1H, L2H, NoPrefetch, CheckMerge,
                    __ESIMD_NS::overaligned_tag<8>>(Q, 1, 4);
 
   return Passed;
@@ -205,7 +205,7 @@ template <typename T, lsc_data_size DS = lsc_data_size::default_size,
           bool IsGatherLikePrefetch = false>
 std::enable_if_t<!IsGatherLikePrefetch, bool> test_lsc_prefetch() {
   constexpr cache_hint L1H = cache_hint::cached;
-  constexpr cache_hint L3H = cache_hint::uncached;
+  constexpr cache_hint L2H = cache_hint::uncached;
   constexpr bool DoPrefetch = true;
 
   auto Q = queue{gpu_selector_v};
@@ -214,15 +214,15 @@ std::enable_if_t<!IsGatherLikePrefetch, bool> test_lsc_prefetch() {
             << Q.get_device().get_info<sycl::info::device::name>() << std::endl;
 
   bool Passed = true;
-  Passed &= test<T, 64, DS, L1H, L3H, DoPrefetch>(Q, 1, 4);
-  Passed &= test<T, 32, DS, L1H, L3H, DoPrefetch>(Q, 1, 4);
-  Passed &= test<T, 16, DS, L1H, L3H, DoPrefetch>(Q, 2, 2);
-  Passed &= test<T, 8, DS, L1H, L3H, DoPrefetch>(Q, 2, 8);
-  Passed &= test<T, 4, DS, L1H, L3H, DoPrefetch>(Q, 3, 3);
+  Passed &= test<T, 64, DS, L1H, L2H, DoPrefetch>(Q, 1, 4);
+  Passed &= test<T, 32, DS, L1H, L2H, DoPrefetch>(Q, 1, 4);
+  Passed &= test<T, 16, DS, L1H, L2H, DoPrefetch>(Q, 2, 2);
+  Passed &= test<T, 8, DS, L1H, L2H, DoPrefetch>(Q, 2, 8);
+  Passed &= test<T, 4, DS, L1H, L2H, DoPrefetch>(Q, 3, 3);
   if constexpr (sizeof(T) * 2 >= sizeof(int))
-    Passed &= test<T, 2, DS, L1H, L3H, DoPrefetch>(Q, 5, 5);
+    Passed &= test<T, 2, DS, L1H, L2H, DoPrefetch>(Q, 5, 5);
   if constexpr (sizeof(T) >= sizeof(int))
-    Passed &= test<T, 1, DS, L1H, L3H, DoPrefetch>(Q, 3, 5);
+    Passed &= test<T, 1, DS, L1H, L2H, DoPrefetch>(Q, 3, 5);
 
   return Passed;
 }
diff --git a/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_usm_gather_prefetch.hpp b/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_usm_gather_prefetch.hpp
index 21c1991cc7267..117b03d919f96 100644
--- a/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_usm_gather_prefetch.hpp
+++ b/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_usm_gather_prefetch.hpp
@@ -26,7 +26,7 @@ typedef uint32_t Toffset;
 
 template <typename T, uint16_t VL, uint16_t VS,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           bool use_prefetch = false, bool use_old_values = false>
 bool test(queue q, uint32_t Groups, uint32_t Threads,
           uint32_t pmask = 0xffffffff) {
@@ -90,15 +90,15 @@ bool test(queue q, uint32_t Groups, uint32_t Threads,
 
        simd<T, VS * VL> vals;
        if constexpr (use_prefetch) {
-         lsc_prefetch<T, VS, DS, L1H, L3H, VL>(in, offset, pred);
+         lsc_prefetch<T, VS, DS, L1H, L2H, VL>(in, offset, pred);
          vals = lsc_gather<T, VS, DS, cache_hint::none, cache_hint::none, VL>(
              in, offset, pred);
        } else if constexpr (!use_old_values) {
-         vals = lsc_gather<T, VS, DS, L1H, L3H, VL>(in, offset, pred);
+         vals = lsc_gather<T, VS, DS, L1H, L2H, VL>(in, offset, pred);
        } else { // use_old_values
          simd<T, VS *VL> old_values = merge_value;
          vals =
-             lsc_gather<T, VS, DS, L1H, L3H, VL>(in, offset, pred, old_values);
+             lsc_gather<T, VS, DS, L1H, L2H, VL>(in, offset, pred, old_values);
        }
 
        if constexpr (DS == lsc_data_size::u8u32 || DS == lsc_data_size::u16u32)
@@ -158,38 +158,38 @@ template <typename T, lsc_data_size DS = lsc_data_size::default_size,
           bool DoPrefetch>
 bool test_lsc_gather_prefetch(queue q) {
   constexpr cache_hint L1H = cache_hint::cached;
-  constexpr cache_hint L3H = cache_hint::uncached;
+  constexpr cache_hint L2H = cache_hint::uncached;
   constexpr bool DoMerging = true;
 
   bool Passed = true;
   Passed &=
-      test<T, 1, 1, DS, L1H, L3H, DoPrefetch, !DoMerging>(q, 4, 4, rand());
+      test<T, 1, 1, DS, L1H, L2H, DoPrefetch, !DoMerging>(q, 4, 4, rand());
   if constexpr (!DoPrefetch)
     Passed &=
-        test<T, 1, 1, DS, L1H, L3H, DoPrefetch, DoMerging>(q, 4, 4, rand());
+        test<T, 1, 1, DS, L1H, L2H, DoPrefetch, DoMerging>(q, 4, 4, rand());
 
 #ifndef USE_SCALAR_OFFSET
   // These tests use lsc_scatter with scalar offset when USE_SCALAR_OFFSET macro
   // is set, which is UB and thus guarded by the macro here.
-  Passed &= test<T, 32, 1, DS, L1H, L3H, DoPrefetch>(q, 1, 4, rand());
-  Passed &= test<T, 16, 1, DS, L1H, L3H, DoPrefetch>(q, 2, 4, rand());
-  Passed &= test<T, 8, 1, DS, L1H, L3H, DoPrefetch>(q, 2, 2, rand());
-  Passed &= test<T, 4, 1, DS, L1H, L3H, DoPrefetch>(q, 4, 2, rand());
-  Passed &= test<T, 2, 1, DS, L1H, L3H, DoPrefetch>(q, 4, 16, rand());
+  Passed &= test<T, 32, 1, DS, L1H, L2H, DoPrefetch>(q, 1, 4, rand());
+  Passed &= test<T, 16, 1, DS, L1H, L2H, DoPrefetch>(q, 2, 4, rand());
+  Passed &= test<T, 8, 1, DS, L1H, L2H, DoPrefetch>(q, 2, 2, rand());
+  Passed &= test<T, 4, 1, DS, L1H, L2H, DoPrefetch>(q, 4, 2, rand());
+  Passed &= test<T, 2, 1, DS, L1H, L2H, DoPrefetch>(q, 4, 16, rand());
 
   // The next block of tests is only for gather with merging semantics,
   // not for prefetch tests.
   if constexpr (!DoPrefetch) {
     Passed &=
-        test<T, 32, 1, DS, L1H, L3H, DoPrefetch, DoMerging>(q, 1, 4, rand());
+        test<T, 32, 1, DS, L1H, L2H, DoPrefetch, DoMerging>(q, 1, 4, rand());
     Passed &=
-        test<T, 2, 1, DS, L1H, L3H, DoPrefetch, DoMerging>(q, 4, 16, rand());
+        test<T, 2, 1, DS, L1H, L2H, DoPrefetch, DoMerging>(q, 4, 16, rand());
   }
 
   if constexpr (((DS == lsc_data_size::default_size && sizeof(T) >= 4) ||
                  DS == lsc_data_size::u32 || DS == lsc_data_size::u32) &&
                 !DoPrefetch) {
-    Passed &= test<T, 32, 2, DS, L1H, L3H, DoPrefetch>(q, 2, 4, rand());
+    Passed &= test<T, 32, 2, DS, L1H, L2H, DoPrefetch>(q, 2, 4, rand());
   }
 #endif // !USE_SCALAR_OFFSET
 
diff --git a/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_usm_store.hpp b/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_usm_store.hpp
index 4765152b644c8..e7864d6401835 100644
--- a/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_usm_store.hpp
+++ b/sycl/test-e2e/ESIMD/lsc/Inputs/lsc_usm_store.hpp
@@ -26,7 +26,7 @@ typedef uint32_t Toffset;
 template <int case_num, typename T, uint32_t Groups, uint32_t Threads,
           uint16_t VL, uint16_t VS, bool transpose,
           lsc_data_size DS = lsc_data_size::default_size,
-          cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
+          cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none,
           typename Flags = __ESIMD_NS::overaligned_tag<4>>
 bool test(uint32_t pmask = 0xffffffff) {
   static_assert((VL == 1) || !transpose, "Transpose must have exec size 1");
@@ -86,10 +86,10 @@ bool test(uint32_t pmask = 0xffffffff) {
             if constexpr (transpose) {
               simd<T, VS> vals(new_val + elem_off, 1);
               if constexpr (sizeof(T) < 8) {
-                lsc_block_store<T, VS, DS, L1H, L3H>(out + elem_off, vals,
+                lsc_block_store<T, VS, DS, L1H, L2H>(out + elem_off, vals,
                                                      Flags{});
               } else {
-                lsc_block_store<T, VS, DS, L1H, L3H>(out + elem_off, vals);
+                lsc_block_store<T, VS, DS, L1H, L2H>(out + elem_off, vals);
               }
             } else {
               simd<Toffset, VL> offset(byte_off, VS * sizeof(T));
@@ -103,7 +103,7 @@ bool test(uint32_t pmask = 0xffffffff) {
                 for (int j = 0; j < VS; j++)
                   vals.template select<1, 1>(i + j * VL) = val++;
 
-              lsc_scatter<T, VS, DS, L1H, L3H, VL>(out, offset, vals, pred);
+              lsc_scatter<T, VS, DS, L1H, L2H, VL>(out, offset, vals, pred);
             }
           });
     });
diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u16.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u16.cpp
index 65d5afe54c541..92b60cfe4a4bd 100644
--- a/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u16.cpp
+++ b/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u16.cpp
@@ -15,30 +15,30 @@ constexpr uint32_t seed = 322;
 using T = uint16_t;
 
 constexpr cache_hint L1H = cache_hint::cached;
-constexpr cache_hint L3H = cache_hint::uncached;
+constexpr cache_hint L2H = cache_hint::uncached;
 
 int main(void) {
   srand(seed);
   bool passed = true;
 
   // non transposed, non transformed
-  passed &= test<1, T, 1, 1, 16, 32, 1, false, false, L1H, L3H, true>(
+  passed &= test<1, T, 1, 1, 16, 32, 1, false, false, L1H, L2H, true>(
       24, 64, 64, 6, 21);
   passed &=
-      test<2, T, 2, 2, 8, 4, 1, false, false, L1H, L3H, true>(16, 16, 32, 2, 5);
+      test<2, T, 2, 2, 8, 4, 1, false, false, L1H, L2H, true>(16, 16, 32, 2, 5);
   passed &=
-      test<3, T, 1, 1, 8, 4, 2, false, false, L1H, L3H, true>(16, 7, 32, 4, 1);
+      test<3, T, 1, 1, 8, 4, 2, false, false, L1H, L2H, true>(16, 7, 32, 4, 1);
 
   // transformed
   passed &=
-      test<4, T, 1, 1, 8, 4, 4, false, true, L1H, L3H, true>(16, 10, 32, 6, 5);
-  passed &= test<5, T, 1, 1, 6, 10, 1, false, true, L1H, L3H, true>(18, 10, 32,
+      test<4, T, 1, 1, 8, 4, 4, false, true, L1H, L2H, true>(16, 10, 32, 6, 5);
+  passed &= test<5, T, 1, 1, 6, 10, 1, false, true, L1H, L2H, true>(18, 10, 32,
                                                                     12, 0);
   passed &=
-      test<6, T, 1, 1, 16, 2, 2, false, true, L1H, L3H, true>(32, 4, 32, 4, 1);
+      test<6, T, 1, 1, 16, 2, 2, false, true, L1H, L2H, true>(32, 4, 32, 4, 1);
   passed &=
-      test<7, T, 2, 2, 2, 16, 2, false, true, L1H, L3H, true>(4, 20, 32, 0, 3);
-  passed &= test<8, T, 1, 1, 16, 32, 1, false, true, L1H, L3H, true>(24, 50, 32,
+      test<7, T, 2, 2, 2, 16, 2, false, true, L1H, L2H, true>(4, 20, 32, 0, 3);
+  passed &= test<8, T, 1, 1, 16, 32, 1, false, true, L1H, L2H, true>(24, 50, 32,
                                                                      4, 14);
 
   std::cout << (passed ? "Passed\n" : "FAILED\n");
diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u32.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u32.cpp
index 1c1b4717c8944..6dfcf035c652f 100644
--- a/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u32.cpp
+++ b/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u32.cpp
@@ -15,29 +15,29 @@ constexpr uint32_t seed = 322;
 using T = uint32_t;
 
 constexpr cache_hint L1H = cache_hint::cached;
-constexpr cache_hint L3H = cache_hint::uncached;
+constexpr cache_hint L2H = cache_hint::uncached;
 
 int main(void) {
   srand(seed);
   bool passed = true;
 
   // non transposed, non transformed
-  passed &= test<1, T, 1, 1, 16, 4, 1, false, false, L1H, L3H, true>(16, 16, 32,
+  passed &= test<1, T, 1, 1, 16, 4, 1, false, false, L1H, L2H, true>(16, 16, 32,
                                                                      2, 1);
   passed &=
-      test<2, T, 2, 2, 8, 4, 1, false, false, L1H, L3H, true>(16, 16, 16, 1, 5);
+      test<2, T, 2, 2, 8, 4, 1, false, false, L1H, L2H, true>(16, 16, 16, 1, 5);
   passed &=
-      test<3, T, 1, 1, 8, 2, 2, false, false, L1H, L3H, true>(16, 4, 16, 3, 1);
+      test<3, T, 1, 1, 8, 2, 2, false, false, L1H, L2H, true>(16, 4, 16, 3, 1);
 
   // transposed
   passed &=
-      test<4, T, 1, 1, 1, 16, 1, true, false, L1H, L3H, true>(16, 20, 16, 1, 2);
+      test<4, T, 1, 1, 1, 16, 1, true, false, L1H, L2H, true>(16, 20, 16, 1, 2);
   passed &=
-      test<5, T, 1, 1, 2, 8, 1, true, false, L1H, L3H, true>(16, 10, 16, 10, 1);
+      test<5, T, 1, 1, 2, 8, 1, true, false, L1H, L2H, true>(16, 10, 16, 10, 1);
   passed &=
-      test<6, T, 1, 1, 4, 8, 1, true, false, L1H, L3H, true>(16, 10, 16, 11, 1);
+      test<6, T, 1, 1, 4, 8, 1, true, false, L1H, L2H, true>(16, 10, 16, 11, 1);
   passed &=
-      test<7, T, 2, 2, 8, 2, 1, true, false, L1H, L3H, true>(16, 4, 16, 1, 1);
+      test<7, T, 2, 2, 8, 2, 1, true, false, L1H, L2H, true>(16, 4, 16, 1, 1);
 
   std::cout << (passed ? "Passed\n" : "FAILED\n");
   return passed ? 0 : 1;
diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u64.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u64.cpp
index e691e1bb61e78..59bc2bd51497b 100644
--- a/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u64.cpp
+++ b/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u64.cpp
@@ -15,7 +15,7 @@ constexpr uint32_t seed = 322;
 using T = uint64_t;
 
 constexpr cache_hint L1H = cache_hint::cached;
-constexpr cache_hint L3H = cache_hint::uncached;
+constexpr cache_hint L2H = cache_hint::uncached;
 
 int main(void) {
   srand(seed);
@@ -23,21 +23,21 @@ int main(void) {
 
   // non transposed, non transformed
   passed &=
-      test<1, T, 1, 1, 8, 32, 1, false, false, L1H, L3H, true>(8, 32, 8, 0, 0);
+      test<1, T, 1, 1, 8, 32, 1, false, false, L1H, L2H, true>(8, 32, 8, 0, 0);
   passed &=
-      test<2, T, 2, 2, 8, 4, 1, false, false, L1H, L3H, true>(16, 16, 16, 1, 5);
+      test<2, T, 2, 2, 8, 4, 1, false, false, L1H, L2H, true>(16, 16, 16, 1, 5);
   passed &=
-      test<3, T, 1, 1, 4, 2, 1, false, false, L1H, L3H, true>(16, 4, 16, 3, 1);
+      test<3, T, 1, 1, 4, 2, 1, false, false, L1H, L2H, true>(16, 4, 16, 3, 1);
 
   // transposed
   passed &=
-      test<4, T, 1, 1, 1, 8, 1, true, false, L1H, L3H, true>(16, 10, 16, 1, 2);
+      test<4, T, 1, 1, 1, 8, 1, true, false, L1H, L2H, true>(16, 10, 16, 1, 2);
   passed &=
-      test<5, T, 1, 1, 2, 8, 1, true, false, L1H, L3H, true>(16, 10, 16, 10, 1);
+      test<5, T, 1, 1, 2, 8, 1, true, false, L1H, L2H, true>(16, 10, 16, 10, 1);
   passed &=
-      test<6, T, 1, 1, 4, 8, 1, true, false, L1H, L3H, true>(16, 10, 16, 11, 1);
+      test<6, T, 1, 1, 4, 8, 1, true, false, L1H, L2H, true>(16, 10, 16, 11, 1);
   passed &=
-      test<7, T, 2, 2, 4, 8, 1, true, false, L1H, L3H, true>(16, 9, 16, 1, 1);
+      test<7, T, 2, 2, 4, 8, 1, true, false, L1H, L2H, true>(16, 9, 16, 1, 1);
 
   std::cout << (passed ? "Passed\n" : "FAILED\n");
   return passed ? 0 : 1;
diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u8.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u8.cpp
index 27e0361a9d0d2..cf703ffa84db7 100644
--- a/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u8.cpp
+++ b/sycl/test-e2e/ESIMD/lsc/lsc_prefetch_2d_u8.cpp
@@ -15,30 +15,30 @@ constexpr uint32_t seed = 322;
 using T = uint8_t;
 
 constexpr cache_hint L1H = cache_hint::cached;
-constexpr cache_hint L3H = cache_hint::uncached;
+constexpr cache_hint L2H = cache_hint::uncached;
 
 int main(void) {
   srand(seed);
   bool passed = true;
 
   // non transposed, non transformed
-  passed &= test<1, T, 1, 1, 16, 32, 2, false, false, L1H, L3H, true>(
+  passed &= test<1, T, 1, 1, 16, 32, 2, false, false, L1H, L2H, true>(
       40, 64, 64, 4, 21);
   passed &=
-      test<2, T, 2, 2, 8, 8, 2, false, false, L1H, L3H, true>(16, 16, 64, 8, 5);
-  passed &= test<3, T, 1, 1, 8, 32, 2, false, false, L1H, L3H, true>(16, 80, 64,
+      test<2, T, 2, 2, 8, 8, 2, false, false, L1H, L2H, true>(16, 16, 64, 8, 5);
+  passed &= test<3, T, 1, 1, 8, 32, 2, false, false, L1H, L2H, true>(16, 80, 64,
                                                                      4, 1);
 
   // transformed
-  passed &= test<4, T, 1, 1, 16, 4, 4, false, true, L1H, L3H, true>(100, 10,
+  passed &= test<4, T, 1, 1, 16, 4, 4, false, true, L1H, L2H, true>(100, 10,
                                                                     128, 16, 5);
-  passed &= test<5, T, 1, 1, 12, 20, 1, false, true, L1H, L3H, true>(16, 40, 64,
+  passed &= test<5, T, 1, 1, 12, 20, 1, false, true, L1H, L2H, true>(16, 40, 64,
                                                                      0, 0);
   passed &=
-      test<6, T, 1, 1, 16, 4, 2, false, true, L1H, L3H, true>(32, 4, 64, 4, 1);
+      test<6, T, 1, 1, 16, 4, 2, false, true, L1H, L2H, true>(32, 4, 64, 4, 1);
   passed &=
-      test<7, T, 2, 2, 4, 16, 2, false, true, L1H, L3H, true>(4, 20, 64, 0, 3);
-  passed &= test<8, T, 1, 1, 16, 32, 1, false, true, L1H, L3H, true>(24, 80, 64,
+      test<7, T, 2, 2, 4, 16, 2, false, true, L1H, L2H, true>(4, 20, 64, 0, 3);
+  passed &= test<8, T, 1, 1, 16, 32, 1, false, true, L1H, L2H, true>(24, 80, 64,
                                                                      4, 14);
 
   std::cout << (passed ? "Passed\n" : "FAILED\n");
diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u16u32.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u16u32.cpp
index e011932e8f596..15b71f20bc52f 100644
--- a/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u16u32.cpp
+++ b/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u16u32.cpp
@@ -15,19 +15,19 @@ constexpr uint32_t seed = 197;
 constexpr lsc_data_size DS = lsc_data_size::u16u32;
 
 constexpr cache_hint L1H = cache_hint::cached;
-constexpr cache_hint L3H = cache_hint::uncached;
+constexpr cache_hint L2H = cache_hint::uncached;
 
 int main(void) {
   srand(seed);
   bool passed = true;
 
   // non-transpose
-  passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS, L1H, L3H, true>(rand());
-  passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS, L1H, L3H, true>(rand());
-  passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS, L1H, L3H, true>(rand());
-  passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS, L1H, L3H, true>(rand());
-  passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS, L1H, L3H, true>(rand());
-  passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS, L1H, L3H, true>(rand());
+  passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS, L1H, L2H, true>(rand());
+  passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS, L1H, L2H, true>(rand());
+  passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS, L1H, L2H, true>(rand());
+  passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS, L1H, L2H, true>(rand());
+  passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS, L1H, L2H, true>(rand());
+  passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS, L1H, L2H, true>(rand());
 
   std::cout << (passed ? "Passed\n" : "FAILED\n");
   return passed ? 0 : 1;
diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u32.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u32.cpp
index 4b027d696727c..152f7f657adb6 100644
--- a/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u32.cpp
+++ b/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u32.cpp
@@ -15,26 +15,26 @@ constexpr uint32_t seed = 199;
 constexpr lsc_data_size DS = lsc_data_size::u32;
 
 constexpr cache_hint L1H = cache_hint::cached;
-constexpr cache_hint L3H = cache_hint::uncached;
+constexpr cache_hint L2H = cache_hint::uncached;
 
 template <int TestCastNum, typename T> bool tests() {
   bool passed = true;
   // non transpose
   passed &=
-      test<TestCastNum, T, 1, 4, 32, 1, false, DS, L1H, L3H, true>(rand());
+      test<TestCastNum, T, 1, 4, 32, 1, false, DS, L1H, L2H, true>(rand());
   passed &=
-      test<TestCastNum + 1, T, 1, 4, 32, 2, false, DS, L1H, L3H, true>(rand());
+      test<TestCastNum + 1, T, 1, 4, 32, 2, false, DS, L1H, L2H, true>(rand());
   passed &=
-      test<TestCastNum + 2, T, 1, 4, 16, 2, false, DS, L1H, L3H, true>(rand());
+      test<TestCastNum + 2, T, 1, 4, 16, 2, false, DS, L1H, L2H, true>(rand());
   passed &=
-      test<TestCastNum + 3, T, 1, 4, 4, 1, false, DS, L1H, L3H, true>(rand());
-  passed &= test<TestCastNum + 4, T, 1, 1, 1, 1, false, DS, L1H, L3H, true>(1);
-  passed &= test<TestCastNum + 5, T, 2, 1, 1, 1, false, DS, L1H, L3H, true>(1);
+      test<TestCastNum + 3, T, 1, 4, 4, 1, false, DS, L1H, L2H, true>(rand());
+  passed &= test<TestCastNum + 4, T, 1, 1, 1, 1, false, DS, L1H, L2H, true>(1);
+  passed &= test<TestCastNum + 5, T, 2, 1, 1, 1, false, DS, L1H, L2H, true>(1);
 
   // transpose
-  passed &= test<TestCastNum + 8, T, 1, 4, 1, 32, true, DS, L1H, L3H, true>();
-  passed &= test<TestCastNum + 9, T, 2, 2, 1, 16, true, DS, L1H, L3H, true>();
-  passed &= test<TestCastNum + 10, T, 4, 4, 1, 4, true, DS, L1H, L3H, true>();
+  passed &= test<TestCastNum + 8, T, 1, 4, 1, 32, true, DS, L1H, L2H, true>();
+  passed &= test<TestCastNum + 9, T, 2, 2, 1, 16, true, DS, L1H, L2H, true>();
+  passed &= test<TestCastNum + 10, T, 4, 4, 1, 4, true, DS, L1H, L2H, true>();
   return passed;
 }
 
diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u64.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u64.cpp
index 84a14bad846d0..170bc500e885d 100644
--- a/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u64.cpp
+++ b/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u64.cpp
@@ -15,26 +15,26 @@ constexpr uint32_t seed = 198;
 constexpr lsc_data_size DS = lsc_data_size::u64;
 
 constexpr cache_hint L1H = cache_hint::cached;
-constexpr cache_hint L3H = cache_hint::uncached;
+constexpr cache_hint L2H = cache_hint::uncached;
 
 template <int TestCastNum, typename T> bool tests() {
   bool passed = true;
   // non transpose
   passed &=
-      test<TestCastNum, T, 1, 4, 32, 1, false, DS, L1H, L3H, true>(rand());
+      test<TestCastNum, T, 1, 4, 32, 1, false, DS, L1H, L2H, true>(rand());
   passed &=
-      test<TestCastNum + 1, T, 1, 4, 32, 2, false, DS, L1H, L3H, true>(rand());
+      test<TestCastNum + 1, T, 1, 4, 32, 2, false, DS, L1H, L2H, true>(rand());
   passed &=
-      test<TestCastNum + 2, T, 1, 4, 16, 2, false, DS, L1H, L3H, true>(rand());
+      test<TestCastNum + 2, T, 1, 4, 16, 2, false, DS, L1H, L2H, true>(rand());
   passed &=
-      test<TestCastNum + 3, T, 1, 4, 4, 1, false, DS, L1H, L3H, true>(rand());
-  passed &= test<TestCastNum + 4, T, 1, 1, 1, 1, false, DS, L1H, L3H, true>(1);
-  passed &= test<TestCastNum + 5, T, 2, 1, 1, 1, false, DS, L1H, L3H, true>(1);
+      test<TestCastNum + 3, T, 1, 4, 4, 1, false, DS, L1H, L2H, true>(rand());
+  passed &= test<TestCastNum + 4, T, 1, 1, 1, 1, false, DS, L1H, L2H, true>(1);
+  passed &= test<TestCastNum + 5, T, 2, 1, 1, 1, false, DS, L1H, L2H, true>(1);
 
   // transpose
-  passed &= test<TestCastNum + 8, T, 1, 4, 1, 32, true, DS, L1H, L3H, true>();
-  passed &= test<TestCastNum + 9, T, 2, 2, 1, 16, true, DS, L1H, L3H, true>();
-  passed &= test<TestCastNum + 10, T, 4, 4, 1, 4, true, DS, L1H, L3H, true>();
+  passed &= test<TestCastNum + 8, T, 1, 4, 1, 32, true, DS, L1H, L2H, true>();
+  passed &= test<TestCastNum + 9, T, 2, 2, 1, 16, true, DS, L1H, L2H, true>();
+  passed &= test<TestCastNum + 10, T, 4, 4, 1, 4, true, DS, L1H, L2H, true>();
   return passed;
 }
 
diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u8u32.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u8u32.cpp
index 4b7db2784163a..15721094223a1 100644
--- a/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u8u32.cpp
+++ b/sycl/test-e2e/ESIMD/lsc/lsc_surf_prefetch_u8u32.cpp
@@ -15,19 +15,19 @@ constexpr uint32_t seed = 196;
 constexpr lsc_data_size DS = lsc_data_size::u8u32;
 
 constexpr cache_hint L1H = cache_hint::cached;
-constexpr cache_hint L3H = cache_hint::uncached;
+constexpr cache_hint L2H = cache_hint::uncached;
 
 int main(void) {
   srand(seed);
   bool passed = true;
 
   // non-transpose
-  passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS, L1H, L3H, true>(rand());
-  passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS, L1H, L3H, true>(rand());
-  passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS, L1H, L3H, true>(rand());
-  passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS, L1H, L3H, true>(rand());
-  passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS, L1H, L3H, true>(rand());
-  passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS, L1H, L3H, true>(rand());
+  passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS, L1H, L2H, true>(rand());
+  passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS, L1H, L2H, true>(rand());
+  passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS, L1H, L2H, true>(rand());
+  passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS, L1H, L2H, true>(rand());
+  passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS, L1H, L2H, true>(rand());
+  passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS, L1H, L2H, true>(rand());
 
   std::cout << (passed ? "Passed\n" : "FAILED\n");
   return passed ? 0 : 1;
diff --git a/sycl/test/esimd/check_lsc.cpp b/sycl/test/esimd/check_lsc.cpp
index 4a211b7a7c2d9..e3f7ba939a7c4 100644
--- a/sycl/test/esimd/check_lsc.cpp
+++ b/sycl/test/esimd/check_lsc.cpp
@@ -18,23 +18,23 @@ using namespace sycl;
 // --- Postive tests.
 
 template <class T, int BLOCK_WIDTH, int BLOCK_HEIGHT, int NUM_BLOCKS,
-          bool TRANSPOSE, bool TRANSFORM, cache_hint L1H, cache_hint L3H,
+          bool TRANSPOSE, bool TRANSFORM, cache_hint L1H, cache_hint L2H,
           int N = __ESIMD_EDNS::get_lsc_block_2d_data_size<
               T, NUM_BLOCKS, BLOCK_HEIGHT, BLOCK_WIDTH, TRANSPOSE, TRANSFORM>()>
 SYCL_EXTERNAL auto test_load(T *ptr, int width, int height,
                              int pitch) SYCL_ESIMD_FUNCTION {
   return lsc_load_2d<T, BLOCK_WIDTH, BLOCK_HEIGHT, NUM_BLOCKS, TRANSPOSE,
-                     TRANSFORM, L1H, L3H>(
+                     TRANSFORM, L1H, L2H>(
       ptr, width * sizeof(T) - 1, height - 1, pitch * sizeof(T) - 1, 0, 0);
 }
 
 template <class T, int BLOCK_WIDTH, int BLOCK_HEIGHT, int NUM_BLOCKS,
-          cache_hint L1H, cache_hint L3H,
+          cache_hint L1H, cache_hint L2H,
           int N = __ESIMD_EDNS::get_lsc_block_2d_data_size<
               T, NUM_BLOCKS, BLOCK_HEIGHT, BLOCK_WIDTH, false, false>()>
 SYCL_EXTERNAL void test_store(T *ptr, simd<T, N> v, int width, int height,
                               int pitch) SYCL_ESIMD_FUNCTION {
-  lsc_store_2d<T, BLOCK_WIDTH, BLOCK_HEIGHT, L1H, L3H>(
+  lsc_store_2d<T, BLOCK_WIDTH, BLOCK_HEIGHT, L1H, L2H>(
       ptr, width * sizeof(T) - 1, height - 1, pitch * sizeof(T) - 1, 0, 0, v);
 }