Skip to content

Commit

Permalink
[ESIMD] Fix slm_atomic_update() implementation for double type (#12337)
Browse files Browse the repository at this point in the history
For double type the GenX intrinsic expects double vectors without
bit-casting them to integer types as for other types.

This fix enables FMAX/FMIN/FCMPXCHG slm_atomic_update() for double type.
It requires pretty new GPU driver.

Signed-off-by: Klochkov, Vyacheslav N <vyacheslav.n.klochkov@intel.com>
  • Loading branch information
v-klochkov authored Jan 10, 2024
1 parent d3a5f1d commit c7549f9
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 19 deletions.
48 changes: 31 additions & 17 deletions sycl/include/sycl/ext/intel/esimd/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4089,15 +4089,22 @@ slm_atomic_update_impl(simd<uint32_t, N> offsets, simd<T, N> src0,
constexpr lsc_data_size EDS = expand_data_size(finalize_data_size<T, DS>());
constexpr lsc_vector_size VS = to_lsc_vector_size<1>();
constexpr lsc_data_order Transposed = lsc_data_order::nontranspose;
using MsgT = typename lsc_expand_type<T>::type;
constexpr int IOp = lsc_to_internal_atomic_op<T, Op>();
simd<MsgT, N> Msg_data = lsc_format_input<MsgT>(src0);
simd<MsgT, N> Tmp =
__esimd_lsc_xatomic_slm_1<MsgT, IOp, cache_hint::none, cache_hint::none,
AddressScale, ImmOffset, EDS, VS, Transposed,
N>(pred.data(), offsets.data(),
Msg_data.data());
return lsc_format_ret<T>(Tmp);
if constexpr (std::is_same_v<T, double>) {
return __esimd_lsc_xatomic_slm_1<T, IOp, cache_hint::none, cache_hint::none,
AddressScale, ImmOffset, EDS, VS,
Transposed, N>(pred.data(), offsets.data(),
src0.data());
} else {
using MsgT = typename lsc_expand_type<T>::type;
simd<MsgT, N> Msg_data = lsc_format_input<MsgT>(src0);
simd<MsgT, N> Tmp =
__esimd_lsc_xatomic_slm_1<MsgT, IOp, cache_hint::none, cache_hint::none,
AddressScale, ImmOffset, EDS, VS, Transposed,
N>(pred.data(), offsets.data(),
Msg_data.data());
return lsc_format_ret<T>(Tmp);
}
}

/// SLM atomic.
Expand Down Expand Up @@ -4126,16 +4133,23 @@ __ESIMD_API simd<T, N> slm_atomic_update_impl(simd<uint32_t, N> offsets,
constexpr lsc_data_size EDS = expand_data_size(finalize_data_size<T, DS>());
constexpr lsc_vector_size VS = to_lsc_vector_size<1>();
constexpr lsc_data_order Transposed = lsc_data_order::nontranspose;
using MsgT = typename lsc_expand_type<T>::type;
constexpr int IOp = lsc_to_internal_atomic_op<T, Op>();
simd<MsgT, N> Msg_data0 = lsc_format_input<MsgT>(src0);
simd<MsgT, N> Msg_data1 = lsc_format_input<MsgT>(src1);
simd<MsgT, N> Tmp =
__esimd_lsc_xatomic_slm_2<MsgT, IOp, cache_hint::none, cache_hint::none,
AddressScale, ImmOffset, EDS, VS, Transposed,
N>(pred.data(), offsets.data(),
Msg_data0.data(), Msg_data1.data());
return lsc_format_ret<T>(Tmp);
if constexpr (std::is_same_v<T, double>) {
return __esimd_lsc_xatomic_slm_2<T, IOp, cache_hint::none, cache_hint::none,
AddressScale, ImmOffset, EDS, VS,
Transposed, N>(pred.data(), offsets.data(),
src0.data(), src1.data());
} else {
using MsgT = typename lsc_expand_type<T>::type;
simd<MsgT, N> Msg_data0 = lsc_format_input<MsgT>(src0);
simd<MsgT, N> Msg_data1 = lsc_format_input<MsgT>(src1);
simd<MsgT, N> Tmp =
__esimd_lsc_xatomic_slm_2<MsgT, IOp, cache_hint::none, cache_hint::none,
AddressScale, ImmOffset, EDS, VS, Transposed,
N>(pred.data(), offsets.data(),
Msg_data0.data(), Msg_data1.data());
return lsc_format_ret<T>(Tmp);
}
}

} // namespace detail
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -612,8 +612,7 @@ bool test_fp_types(queue q) {

if constexpr (Features == TestFeatures::DG2 ||
Features == TestFeatures::PVC) {
// TODO: fmin/max for double does not pass validation likely due to
// a driver bug. fcmpwr is hanging.
// TODO: fmin/fmax/fcmpxchg for double requires a newer GPU driver.
if constexpr (!std::is_same_v<Op<double, N>, ImplLSCFmax<double, N>> &&
!std::is_same_v<Op<double, N>, ImplLSCFmin<double, N>> &&
!std::is_same_v<Op<double, N>, ImplLSCFcmpwr<double, N>>) {
Expand Down

0 comments on commit c7549f9

Please sign in to comment.