Skip to content

Commit

Permalink
[SYCL][ESIMD] Implement accessor based prefetch API that accepting co…
Browse files Browse the repository at this point in the history
…mpile time properties (#12878)

Co-authored-by: Nick Sarnie <sarnex@users.noreply.github.com>
  • Loading branch information
fineg74 and sarnex authored Mar 6, 2024
1 parent e4d2873 commit 656b8be
Show file tree
Hide file tree
Showing 8 changed files with 864 additions and 70 deletions.
28 changes: 28 additions & 0 deletions sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,34 @@ __ESIMD_INTRIN void __esimd_lsc_prefetch_stateless(
__ESIMD_DNS::simd_mask_storage_t<N> pred,
__ESIMD_DNS::vector_type_t<uintptr_t, N> addrs) __ESIMD_INTRIN_END;

/// Surface-based prefetch gather.
/// Supported platforms: DG2, PVC
///
/// Prefetches elements located at surface.
///
/// @tparam Ty is element type.
/// @tparam L1H is L1 cache hint.
/// @tparam L2H is L2 cache hint.
/// @tparam AddressScale is the address scale.
/// @tparam ImmOffset is the immediate offset added to each address.
/// @tparam DS is the data size.
/// @tparam VS is the number of elements to load per address.
/// @tparam Transposed indicates if the data is transposed during the transfer.
/// @tparam N is the SIMD size of operation (the number of addresses to access)
/// @tparam SurfIndAliasTy is the \ref sycl::accessor type.
/// @param pred is predicates.
/// @param offsets is the zero-based offsets in bytes.
/// @param surf_ind is the surface index.
template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
__ESIMD_DNS::lsc_vector_size VS,
__ESIMD_DNS::lsc_data_order Transposed, int N,
typename SurfIndAliasTy>
__ESIMD_INTRIN void
__esimd_lsc_prefetch_bti(__ESIMD_DNS::simd_mask_storage_t<N> pred,
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
SurfIndAliasTy surf_ind) __ESIMD_INTRIN_END;

// Read a block of data from SLM at the given offset.
template <typename Ty, int N, size_t Align>
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
Expand Down
460 changes: 460 additions & 0 deletions sycl/include/sycl/ext/intel/esimd/memory.hpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -62,34 +62,6 @@ __ESIMD_INTRIN void __esimd_raw_send_nbarrier_signal(
__ESIMD_DNS::vector_type_t<Ty, N> msg_var,
uint16_t pred = 1) __ESIMD_INTRIN_END;

/// Surface-based prefetch gather.
/// Supported platforms: DG2, PVC
///
/// Prefetches elements located at surface.
///
/// @tparam Ty is element type.
/// @tparam L1H is L1 cache hint.
/// @tparam L2H is L2 cache hint.
/// @tparam AddressScale is the address scale.
/// @tparam ImmOffset is the immediate offset added to each address.
/// @tparam DS is the data size.
/// @tparam VS is the number of elements to load per address.
/// @tparam Transposed indicates if the data is transposed during the transfer.
/// @tparam N is the SIMD size of operation (the number of addresses to access)
/// @tparam SurfIndAliasTy is the \ref sycl::accessor type.
/// @param pred is predicates.
/// @param offsets is the zero-based offsets in bytes.
/// @param surf_ind is the surface index.
template <typename Ty, __ESIMD_ENS::cache_hint L1H, __ESIMD_ENS::cache_hint L2H,
uint16_t AddressScale, int ImmOffset, __ESIMD_ENS::lsc_data_size DS,
__ESIMD_EDNS::lsc_vector_size VS,
__ESIMD_EDNS::lsc_data_order _Transposed, int N,
typename SurfIndAliasTy>
__ESIMD_INTRIN void
__esimd_lsc_prefetch_bti(__ESIMD_DNS::simd_mask_storage_t<N> pred,
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
SurfIndAliasTy surf_ind) __ESIMD_INTRIN_END;

/// 2D USM pointer block load.
/// Supported platforms: PVC
///
Expand Down
45 changes: 6 additions & 39 deletions sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1293,30 +1293,13 @@ template <typename T, int NElts = 1,
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>>
lsc_prefetch(AccessorTy acc,
#ifdef __ESIMD_FORCE_STATELESS_MEM
__ESIMD_NS::simd<uint64_t, N> offsets,
#else
__ESIMD_NS::simd<uint32_t, N> offsets,
#endif
__ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
__ESIMD_NS::simd_mask<N> pred = 1) {
#ifdef __ESIMD_FORCE_STATELESS_MEM
return lsc_prefetch<T, NElts, DS, L1H, L2H>(
__ESIMD_DNS::accessorToPointer<T>(acc), offsets, pred);
lsc_prefetch<T, NElts, DS, L1H, L2H>(__ESIMD_DNS::accessorToPointer<T>(acc),
offsets, pred);
#else
detail::check_lsc_vector_size<NElts>();
detail::check_lsc_data_size<T, DS>();
detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L2H>();
constexpr uint16_t _AddressScale = 1;
constexpr int _ImmOffset = 0;
constexpr lsc_data_size _DS =
detail::expand_data_size(detail::finalize_data_size<T, DS>());
constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<NElts>();
constexpr detail::lsc_data_order _Transposed =
detail::lsc_data_order::nontranspose;
using MsgT = typename detail::lsc_expand_type<T>::type;
auto si = __ESIMD_NS::get_surface_index(acc);
__esimd_lsc_prefetch_bti<MsgT, L1H, L2H, _AddressScale, _ImmOffset, _DS, _VS,
_Transposed, N>(pred.data(), offsets.data(), si);
__ESIMD_DNS::prefetch_impl<T, NElts, DS, L1H, L2H>(acc, offsets, pred);
#endif
}

Expand Down Expand Up @@ -1362,24 +1345,8 @@ lsc_prefetch(AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset) {
lsc_prefetch<T, NElts, DS, L1H, L2H>(
__ESIMD_DNS::accessorToPointer<T>(acc, offset));
#else
detail::check_lsc_vector_size<NElts>();
detail::check_lsc_data_size<T, DS>();
detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L2H>();
constexpr uint16_t _AddressScale = 1;
constexpr int _ImmOffset = 0;
constexpr lsc_data_size _DS = detail::finalize_data_size<T, DS>();
static_assert(
_DS == lsc_data_size::u32 || _DS == lsc_data_size::u64,
"Transposed prefetch is supported only for data size u32 or u64");
constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<NElts>();
constexpr detail::lsc_data_order _Transposed =
detail::lsc_data_order::transpose;
constexpr int N = 1;
__ESIMD_NS::simd_mask<N> pred = 1;
__ESIMD_NS::simd<uint32_t, N> offsets = offset;
auto si = __ESIMD_NS::get_surface_index(acc);
__esimd_lsc_prefetch_bti<T, L1H, L2H, _AddressScale, _ImmOffset, _DS, _VS,
_Transposed, N>(pred.data(), offsets.data(), si);
__ESIMD_NS::simd_mask<1> Mask = 1;
__ESIMD_DNS::prefetch_impl<T, NElts, DS, L1H, L2H>(acc, offset, Mask);
#endif
}

Expand Down
Loading

0 comments on commit 656b8be

Please sign in to comment.