Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix performance issue in __serial_merge #2022

Merged
merged 20 commits into from
Jan 27, 2025
Merged
Changes from 14 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
6fe726b
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h - …
SergeyKopienko Jan 27, 2025
98fb359
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h - …
SergeyKopienko Jan 27, 2025
2343991
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h - …
SergeyKopienko Jan 27, 2025
e32361e
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h - …
SergeyKopienko Jan 27, 2025
2f12ce8
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h - …
SergeyKopienko Jan 27, 2025
3ed132c
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h - …
SergeyKopienko Jan 27, 2025
67c793f
Revert "include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_me…
SergeyKopienko Jan 27, 2025
f2e33a4
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h - …
SergeyKopienko Jan 27, 2025
2f99740
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h - …
SergeyKopienko Jan 27, 2025
b88c6a6
Merge branch 'main' into dev/skopienko/fix_serial_merge_and_perf
SergeyKopienko Jan 27, 2025
243460c
Apply GitHUB clang format
SergeyKopienko Jan 27, 2025
2825d16
Update include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_mer…
SergeyKopienko Jan 27, 2025
c50d0bc
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h - …
SergeyKopienko Jan 27, 2025
3678dd2
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h - …
SergeyKopienko Jan 27, 2025
aaa9ddf
Simplify SFINAE in serial merge pull request (#2024)
dmitriy-sobolev Jan 27, 2025
ccce034
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h - …
SergeyKopienko Jan 27, 2025
7288c5f
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h - …
SergeyKopienko Jan 27, 2025
f28c9e6
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h - …
SergeyKopienko Jan 27, 2025
136e42e
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h - …
SergeyKopienko Jan 27, 2025
c847f99
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h - …
SergeyKopienko Jan 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 35 additions & 4 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,25 @@ __find_start_point(const _Rng1& __rng1, const _Index __rng1_from, _Index __rng1_
return _split_point_t<_Index>{*__res, __index_sum - *__res + 1};
}

template <typename _Rng1, typename _Rng2, typename _value_t_rng1 = oneapi::dpl::__internal::__value_t<_Rng1>,
typename _value_t_rng2 = oneapi::dpl::__internal::__value_t<_Rng2>>
constexpr auto
__can_use_ternary_op(int) -> decltype(true ? std::declval<_value_t_rng1>() : std::declval<_value_t_rng2>(),
std::true_type{})
{
return {};
}

template <typename _Rng1, typename _Rng2>
constexpr auto
__can_use_ternary_op(long) -> std::false_type
{
return {};
}

SergeyKopienko marked this conversation as resolved.
Show resolved Hide resolved
template <typename _Rng1, typename _Rng2>
constexpr static bool __can_use_ternary_op_v = decltype(__can_use_ternary_op<_Rng1, _Rng2>(int{0}))::value;
SergeyKopienko marked this conversation as resolved.
Show resolved Hide resolved

// Do serial merge of the data from rng1 (starting from start1) and rng2 (starting from start2) and writing
// to rng3 (starting from start3) in 'chunk' steps, but do not exceed the total size of the sequences (n1 and n2)
template <typename _Rng1, typename _Rng2, typename _Rng3, typename _Index, typename _Compare>
Expand All @@ -156,11 +175,23 @@ __serial_merge(const _Rng1& __rng1, const _Rng2& __rng2, _Rng3& __rng3, const _I
// One of __rng1_idx_less_n1 and __rng2_idx_less_n2 should be true here
// because 1) we should fill output data with elements from one of the input ranges
// 2) we calculate __rng3_idx_end as std::min<_Index>(__rng1_size + __rng2_size, __chunk).
if (__rng1_idx_less_n1 && __rng2_idx_less_n2 && __comp(__rng2[__rng2_idx], __rng1[__rng1_idx]) ||
!__rng1_idx_less_n1)
__rng3[__rng3_idx] = __rng2[__rng2_idx++];
if constexpr (__can_use_ternary_op_v<_Rng1, _Rng2>)
{
// This implementation is required for performance optimization
__rng3[__rng3_idx] = (!__rng1_idx_less_n1 || __rng1_idx_less_n1 && __rng2_idx_less_n2 &&
__comp(__rng2[__rng2_idx], __rng1[__rng1_idx]))
? __rng2[__rng2_idx++]
: __rng1[__rng1_idx++];
}
else
__rng3[__rng3_idx] = __rng1[__rng1_idx++];
{
// TODO required to understand why the usual if-else is slower then ternary operator
if (!__rng1_idx_less_n1 ||
__rng1_idx_less_n1 && __rng2_idx_less_n2 && __comp(__rng2[__rng2_idx], __rng1[__rng1_idx]))
__rng3[__rng3_idx] = __rng2[__rng2_idx++];
else
__rng3[__rng3_idx] = __rng1[__rng1_idx++];
}
}
}

Expand Down
Loading