Skip to content

Commit 657f50b

Browse files
authored
Fix typos, rename types, and add null_probability benchmark axis for distinct (rapidsai#17546)
This PR addresses several minor issues discovered while working on rapidsai#17467: - Corrected a typo where `RowHasher` should have been `RowEqual` - Renamed `hash_set_type` to `distinct_set_t` - Added a `null_probability` benchmark axis for the distinct benchmark, similar to other stream compaction benchmarks Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Muhammad Haseeb (https://github.com/mhaseeb123) - Vyas Ramasubramani (https://github.com/vyasr) URL: rapidsai#17546
1 parent 5306eca commit 657f50b

File tree

4 files changed

+18
-14
lines changed

4 files changed

+18
-14
lines changed

cpp/benchmarks/stream_compaction/distinct.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ void nvbench_distinct(nvbench::state& state, nvbench::type_list<Type>)
3434
cudf::size_type const num_rows = state.get_int64("NumRows");
3535
auto const keep = get_keep(state.get_string("keep"));
3636
cudf::size_type const cardinality = state.get_int64("cardinality");
37+
auto const null_probability = state.get_float64("null_probability");
3738

3839
if (cardinality > num_rows) {
3940
state.skip("cardinality > num_rows");
@@ -42,7 +43,7 @@ void nvbench_distinct(nvbench::state& state, nvbench::type_list<Type>)
4243

4344
data_profile profile = data_profile_builder()
4445
.cardinality(cardinality)
45-
.null_probability(0.01)
46+
.null_probability(null_probability)
4647
.distribution(cudf::type_to_id<Type>(),
4748
distribution_id::UNIFORM,
4849
static_cast<Type>(0),
@@ -65,6 +66,7 @@ using data_type = nvbench::type_list<int32_t, int64_t>;
6566
NVBENCH_BENCH_TYPES(nvbench_distinct, NVBENCH_TYPE_AXES(data_type))
6667
.set_name("distinct")
6768
.set_type_axes_names({"Type"})
69+
.add_float64_axis("null_probability", {0.01})
6870
.add_string_axis("keep", {"any", "first", "last", "none"})
6971
.add_int64_axis("cardinality", {100, 100'000, 10'000'000, 1'000'000'000})
7072
.add_int64_axis("NumRows", {100, 100'000, 10'000'000, 1'000'000'000});

cpp/src/stream_compaction/distinct.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ rmm::device_uvector<size_type> distinct_indices(table_view const& input,
9595
auto const row_equal = cudf::experimental::row::equality::self_comparator(preprocessed_input);
9696

9797
auto const helper_func = [&](auto const& d_equal) {
98-
using RowHasher = std::decay_t<decltype(d_equal)>;
99-
auto set = hash_set_type<RowHasher>{
98+
using RowEqual = std::decay_t<decltype(d_equal)>;
99+
auto set = distinct_set_t<RowEqual>{
100100
num_rows,
101101
0.5, // desired load factor
102102
cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL},

cpp/src/stream_compaction/distinct_helpers.cu

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121

2222
namespace cudf::detail {
2323

24-
template <typename RowHasher>
25-
rmm::device_uvector<size_type> reduce_by_row(hash_set_type<RowHasher>& set,
24+
template <typename RowEqual>
25+
rmm::device_uvector<size_type> reduce_by_row(distinct_set_t<RowEqual>& set,
2626
size_type num_rows,
2727
duplicate_keep_option keep,
2828
rmm::cuda_stream_view stream,
@@ -100,7 +100,7 @@ rmm::device_uvector<size_type> reduce_by_row(hash_set_type<RowHasher>& set,
100100
}
101101

102102
template rmm::device_uvector<size_type> reduce_by_row(
103-
hash_set_type<cudf::experimental::row::equality::device_row_comparator<
103+
distinct_set_t<cudf::experimental::row::equality::device_row_comparator<
104104
false,
105105
cudf::nullate::DYNAMIC,
106106
cudf::experimental::row::equality::nan_equal_physical_equality_comparator>>& set,
@@ -110,7 +110,7 @@ template rmm::device_uvector<size_type> reduce_by_row(
110110
rmm::device_async_resource_ref mr);
111111

112112
template rmm::device_uvector<size_type> reduce_by_row(
113-
hash_set_type<cudf::experimental::row::equality::device_row_comparator<
113+
distinct_set_t<cudf::experimental::row::equality::device_row_comparator<
114114
true,
115115
cudf::nullate::DYNAMIC,
116116
cudf::experimental::row::equality::nan_equal_physical_equality_comparator>>& set,
@@ -120,7 +120,7 @@ template rmm::device_uvector<size_type> reduce_by_row(
120120
rmm::device_async_resource_ref mr);
121121

122122
template rmm::device_uvector<size_type> reduce_by_row(
123-
hash_set_type<cudf::experimental::row::equality::device_row_comparator<
123+
distinct_set_t<cudf::experimental::row::equality::device_row_comparator<
124124
false,
125125
cudf::nullate::DYNAMIC,
126126
cudf::experimental::row::equality::physical_equality_comparator>>& set,
@@ -130,7 +130,7 @@ template rmm::device_uvector<size_type> reduce_by_row(
130130
rmm::device_async_resource_ref mr);
131131

132132
template rmm::device_uvector<size_type> reduce_by_row(
133-
hash_set_type<cudf::experimental::row::equality::device_row_comparator<
133+
distinct_set_t<cudf::experimental::row::equality::device_row_comparator<
134134
true,
135135
cudf::nullate::DYNAMIC,
136136
cudf::experimental::row::equality::physical_equality_comparator>>& set,

cpp/src/stream_compaction/distinct_helpers.hpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,12 @@ auto constexpr reduction_init_value(duplicate_keep_option keep)
4747
}
4848
}
4949

50-
template <typename RowHasher>
51-
using hash_set_type =
50+
template <typename RowEqual>
51+
using distinct_set_t =
5252
cuco::static_set<size_type,
5353
cuco::extent<int64_t>,
5454
cuda::thread_scope_device,
55-
RowHasher,
55+
RowEqual,
5656
cuco::linear_probing<1,
5757
cudf::experimental::row::hash::device_row_hasher<
5858
cudf::hashing::detail::default_hash,
@@ -79,6 +79,8 @@ using hash_set_type =
7979
* the `reduction_init_value()` function. Then, the reduction result for each row group is written
8080
* into the output array at the index of an unspecified row in the group.
8181
*
82+
* @tparam RowEqual The type of row equality comparator
83+
*
8284
* @param set The auxiliary set to perform reduction
8385
* @param set_size The number of elements in set
8486
* @param num_rows The number of all input rows
@@ -87,8 +89,8 @@ using hash_set_type =
8789
* @param mr Device memory resource used to allocate the returned vector
8890
* @return A device_uvector containing the output indices
8991
*/
90-
template <typename RowHasher>
91-
rmm::device_uvector<size_type> reduce_by_row(hash_set_type<RowHasher>& set,
92+
template <typename RowEqual>
93+
rmm::device_uvector<size_type> reduce_by_row(distinct_set_t<RowEqual>& set,
9294
size_type num_rows,
9395
duplicate_keep_option keep,
9496
rmm::cuda_stream_view stream,

0 commit comments

Comments
 (0)