Skip to content

Commit

Permalink
add comments and minor renaming
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexandr-Solovev committed Dec 17, 2024
1 parent 917fa3d commit 06d9f82
Show file tree
Hide file tree
Showing 13 changed files with 200 additions and 150 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,11 @@ std::int32_t most_frequent_element(const std::atomic<std::int32_t> *components,
std::int32_t *rnd_vertex_ids = allocate(vertex_allocator, samples_count);

dal::backend::primitives::host_engine eng;
dal::backend::primitives::uniform_cpu<std::int32_t>(samples_count,
rnd_vertex_ids,
eng,
0,
vertex_count);
dal::backend::primitives::uniform<std::int32_t>(samples_count,
rnd_vertex_ids,
eng,
0,
vertex_count);

std::int32_t *root_sample_counts = allocate(vertex_allocator, vertex_count);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -398,11 +398,11 @@ sycl::event train_kernel_hist_impl<Float, Bin, Index, Task>::gen_initial_tree_or
for (Index node_idx = 0; node_idx < node_count; ++node_idx) {
Index* gen_row_idx_global_ptr =
selected_row_global_ptr + ctx.selected_row_total_count_ * node_idx;
pr::uniform_cpu<Index>(ctx.selected_row_total_count_,
gen_row_idx_global_ptr,
rng_engine_list[engine_offset + node_idx],
0,
ctx.row_total_count_);
pr::uniform<Index>(ctx.selected_row_total_count_,
gen_row_idx_global_ptr,
rng_engine_list[engine_offset + node_idx],
0,
ctx.row_total_count_);

if (ctx.distr_mode_) {
Index* node_ptr = node_list_ptr + node_idx * impl_const_t::node_prop_count_;
Expand Down Expand Up @@ -485,7 +485,7 @@ train_kernel_hist_impl<Float, Bin, Index, Task>::gen_feature_list(
auto tree_map_ptr = node_vs_tree_map_list_host.get_mutable_data();
if (ctx.selected_ftr_count_ != ctx.column_count_) {
for (Index node = 0; node < node_count; ++node) {
pr::uniform_without_replacement_cpu<Index>(
pr::uniform_without_replacement<Index>(
ctx.selected_ftr_count_,
selected_features_host_ptr + node * ctx.selected_ftr_count_,
selected_features_host_ptr + (node + 1) * ctx.selected_ftr_count_,
Expand Down Expand Up @@ -534,11 +534,11 @@ train_kernel_hist_impl<Float, Bin, Index, Task>::gen_random_thresholds(

// Generate random bins for selected features
for (Index node = 0; node < node_count; ++node) {
pr::uniform_cpu<Float>(ctx.selected_ftr_count_,
random_bins_host_ptr + node * ctx.selected_ftr_count_,
rng_engine_list[tree_map_ptr[node]],
0.0f,
1.0f);
pr::uniform<Float>(ctx.selected_ftr_count_,
random_bins_host_ptr + node * ctx.selected_ftr_count_,
rng_engine_list[tree_map_ptr[node]],
0.0f,
1.0f);
}
auto event_rnd_generate =
random_bins_com.assign_from_host(queue_, random_bins_host_ptr, random_bins_com.get_count());
Expand Down Expand Up @@ -1658,9 +1658,9 @@ sycl::event train_kernel_hist_impl<Float, Bin, Index, Task>::compute_results(
const Float div1 = Float(1) / Float(built_tree_count + tree_idx_in_block + 1);

for (Index column_idx = 0; column_idx < ctx.column_count_; ++column_idx) {
pr::shuffle_cpu<Index>(oob_row_count,
permutation_ptr,
engine_arr[built_tree_count + tree_idx_in_block]);
pr::shuffle<Index>(oob_row_count,
permutation_ptr,
engine_arr[built_tree_count + tree_idx_in_block]);
const Float oob_err_perm = compute_oob_error_perm(ctx,
model_manager,
data_host,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ inline Float move_nodes(const dal::preview::detail::topology<IndexType>& t,
ld.random_order[index] = index;
}
// random shuffle
uniform_cpu<std::int32_t>(t._vertex_count, ld.index, ld.eng, 0, t._vertex_count);
uniform<std::int32_t>(t._vertex_count, ld.index, ld.eng, 0, t._vertex_count);
for (std::int64_t index = 0; index < t._vertex_count; ++index) {
std::swap(ld.random_order[index], ld.random_order[ld.index[index]]);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ class logloss_test : public te::float_algo_fixture<std::tuple_element_t<0, Param

for (std::int32_t ij = 0; ij < num_checks; ++ij) {
primitives::host_engine eng(2007 + dim * num_checks + ij);
pr::uniform_cpu<float_t>(dim, vec_host.get_mutable_data(), eng, -1.0, 1.0);
pr::uniform<float_t>(dim, vec_host.get_mutable_data(), eng, -1.0, 1.0);
auto vec_gpu = vec_host.to_device(this->get_queue());
auto out_vector =
ndarray<float_t, 1>::empty(this->get_queue(), { dim }, sycl::usm::alloc::device);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class logloss_spmd_test : public logloss_test<Param> {
host_engine eng(2007 + dim * num_checks + ij);
vecs_host[ij] =
(ndarray<float_t, 1>::empty(this->get_queue(), { dim }, sycl::usm::alloc::host));
uniform_cpu<float_t>(dim, vecs_host[ij].get_mutable_data(), eng, -1.0, 1.0);
uniform<float_t>(dim, vecs_host[ij].get_mutable_data(), eng, -1.0, 1.0);
vecs_gpu[ij] = vecs_host[ij].to_device(this->get_queue());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class cg_solver_test : public te::float_algo_fixture<Param> {
b_host_ = ndarray<float_t, 1>::empty(this->get_queue(), { n_ }, sycl::usm::alloc::host);

primitives::host_engine eng(4014 + n_);
primitives::uniform_cpu<float_t>(n_, x_host_.get_mutable_data(), eng, -1.0, 1.0);
primitives::uniform<float_t>(n_, x_host_.get_mutable_data(), eng, -1.0, 1.0);

create_stable_matrix(this->get_queue(), A_host_);

Expand Down
4 changes: 2 additions & 2 deletions cpp/oneapi/dal/backend/primitives/optimizers/test/fixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ void create_stable_matrix(sycl::queue& queue,
auto eigen_values = ndarray<Float, 1>::empty(queue, { n }, sycl::usm::alloc::host);
primitives::host_engine eng(2007 + n);

primitives::uniform_cpu<Float>(n * n, J.get_mutable_data(), eng, -1.0, 1.0);
primitives::uniform_cpu<Float>(n, eigen_values.get_mutable_data(), eng, bottom_eig, top_eig);
primitives::uniform<Float>(n * n, J.get_mutable_data(), eng, -1.0, 1.0);
primitives::uniform<Float>(n, eigen_values.get_mutable_data(), eng, bottom_eig, top_eig);

// orthogonalize matrix J
gram_schmidt(J);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ class newton_cg_test : public te::float_algo_fixture<Param> {
ndarray<float_t, 1>::empty(this->get_queue(), { p_ + 1 }, sycl::usm::alloc::host);

primitives::host_engine eng(2007 + n);
primitives::uniform_cpu<float_t>(n_ * p_, X_host.get_mutable_data(), eng, -10.0, 10.0);
primitives::uniform_cpu<float_t>(p_ + 1, params_host.get_mutable_data(), eng, -5.0, 5.0);
primitives::uniform<float_t>(n_ * p_, X_host.get_mutable_data(), eng, -10.0, 10.0);
primitives::uniform<float_t>(p_ + 1, params_host.get_mutable_data(), eng, -5.0, 5.0);
for (std::int64_t i = 0; i < n_; ++i) {
float_t val = 0;
for (std::int64_t j = 0; j < p_; ++j) {
Expand Down Expand Up @@ -144,7 +144,7 @@ class newton_cg_test : public te::float_algo_fixture<Param> {
auto b_host = ndarray<float_t, 1>::empty(this->get_queue(), { n_ }, sycl::usm::alloc::host);

primitives::engine eng(4014 + n_);
uniform_cpu<float_t>(n_, solution_.get_mutable_data(), eng, -1.0, 1.0);
uniform<float_t>(n_, solution_.get_mutable_data(), eng, -1.0, 1.0);

create_stable_matrix(this->get_queue(), A_host, float_t(0.1), float_t(5.0));

Expand All @@ -164,7 +164,7 @@ class newton_cg_test : public te::float_algo_fixture<Param> {
auto buffer = ndarray<float_t, 1>::empty(this->get_queue(), { n_ }, sycl::usm::alloc::host);

for (std::int32_t test_num = 0; test_num < 5; ++test_num) {
uniform_cpu<float_t>(n_, x_host.get_mutable_data(), eng, -1.0, 1.0);
uniform<float_t>(n_, x_host.get_mutable_data(), eng, -1.0, 1.0);
auto x_gpu = x_host.to_device(this->get_queue());
auto compute_event_vec = func_->update_x(x_gpu, true, {});
wait_or_pass(compute_event_vec).wait_and_throw();
Expand Down
15 changes: 15 additions & 0 deletions cpp/oneapi/dal/backend/primitives/rng/dpc_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,21 @@ struct dpc_engine_type<engine_method::philox4x32x10> {
using type = oneapi::mkl::rng::philox4x32x10;
};

/// A class that provides a unified interface for random number generation on both CPU and GPU devices.
///
/// This class serves as a wrapper for random number generators (RNGs) that supports different engine types,
/// enabling efficient random number generation on heterogeneous platforms using SYCL. It integrates a host
/// (CPU) engine and a device (GPU) engine, allowing operations to be executed seamlessly on the appropriate
/// device.
///
/// @tparam EngineType The RNG engine type to be used. Defaults to `engine_method::mt2203`.
///
/// @param[in] queue The SYCL queue used to manage device operations.
/// @param[in] seed The initial seed for the random number generator. Defaults to `777`.
///
/// The class provides functionality to skip ahead in the RNG sequence, retrieve engine states, and
/// manage host and device engines independently. Support for `skip_ahead` on GPU is currently limited for
/// some engine types.
template <engine_method EngineType = engine_method::mt2203>
class dpc_engine {
public:
Expand Down
11 changes: 11 additions & 0 deletions cpp/oneapi/dal/backend/primitives/rng/host_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@

namespace oneapi::dal::backend::primitives {

/// A class that provides an interface for random number generation on the host (CPU) only.
///
/// This class serves as a wrapper for host-based random number generators (RNGs), supporting multiple engine
/// types for flexible and efficient random number generation on CPU. It abstracts the underlying engine
/// implementation and provides an interface to manage and retrieve the engine's state.
///
/// @tparam EngineType The RNG engine type to be used. Defaults to `engine_method::mt2203`.
///
/// @param[in] seed The initial seed for the random number generator. Defaults to `777`.
///
/// @note The class only supports host-based RNG and does not require a SYCL queue or device context.
template <engine_method EngineType = engine_method::mt2203>
class host_engine {
public:
Expand Down
72 changes: 36 additions & 36 deletions cpp/oneapi/dal/backend/primitives/rng/rng.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,18 @@
namespace oneapi::dal::backend::primitives {

template <typename Type, typename Size, engine_method EngineType>
void uniform_cpu(Size count, Type* dst, host_engine<EngineType>& host_engine, Type a, Type b) {
void uniform(Size count, Type* dst, host_engine<EngineType>& host_engine, Type a, Type b) {
auto state = host_engine.get_host_engine_state();
uniform_dispatcher::uniform_by_cpu<Type>(count, dst, state, a, b);
}

template <typename Type, typename Size, engine_method EngineType>
void uniform_without_replacement_cpu(Size count,
Type* dst,
Type* buffer,
host_engine<EngineType> host_engine,
Type a,
Type b) {
void uniform_without_replacement(Size count,
Type* dst,
Type* buffer,
host_engine<EngineType> host_engine,
Type a,
Type b) {
auto state = host_engine.get_host_engine_state();
uniform_dispatcher::uniform_without_replacement_by_cpu<Type>(count, dst, buffer, state, a, b);
}
Expand All @@ -48,7 +48,7 @@ template <typename Type,
engine_method EngineType,
typename T = Type,
typename = std::enable_if_t<std::is_integral_v<T>>>
void shuffle_cpu(Size count, Type* dst, host_engine<EngineType> host_engine) {
void shuffle(Size count, Type* dst, host_engine<EngineType> host_engine) {
auto state = host_engine.get_host_engine_state();
Type idx[2];
for (Size i = 0; i < count; ++i) {
Expand All @@ -59,7 +59,7 @@ void shuffle_cpu(Size count, Type* dst, host_engine<EngineType> host_engine) {

#ifdef ONEDAL_DATA_PARALLEL
template <typename Type, typename Size, engine_method EngineType>
void uniform_cpu(Size count, Type* dst, dpc_engine<EngineType>& engine_, Type a, Type b) {
void uniform(Size count, Type* dst, dpc_engine<EngineType>& engine_, Type a, Type b) {
if (sycl::get_pointer_type(dst, engine_.get_queue().get_context()) ==
sycl::usm::alloc::device) {
throw domain_error(dal::detail::error_messages::unsupported_data_type());
Expand All @@ -70,12 +70,12 @@ void uniform_cpu(Size count, Type* dst, dpc_engine<EngineType>& engine_, Type a,
}

template <typename Type, typename Size, engine_method EngineType>
void uniform_without_replacement_cpu(Size count,
Type* dst,
Type* buffer,
dpc_engine<EngineType>& engine_,
Type a,
Type b) {
void uniform_without_replacement(Size count,
Type* dst,
Type* buffer,
dpc_engine<EngineType>& engine_,
Type a,
Type b) {
if (sycl::get_pointer_type(dst, engine_.get_queue().get_context()) ==
sycl::usm::alloc::device) {
throw domain_error(dal::detail::error_messages::unsupported_data_type());
Expand All @@ -90,7 +90,7 @@ template <typename Type,
engine_method EngineType,
typename T = Type,
typename = std::enable_if_t<std::is_integral_v<T>>>
void shuffle_cpu(Size count, Type* dst, dpc_engine<EngineType>& engine_) {
void shuffle(Size count, Type* dst, dpc_engine<EngineType>& engine_) {
if (sycl::get_pointer_type(dst, engine_.get_queue().get_context()) ==
sycl::usm::alloc::device) {
throw domain_error(dal::detail::error_messages::unsupported_data_type());
Expand All @@ -105,30 +105,30 @@ void shuffle_cpu(Size count, Type* dst, dpc_engine<EngineType>& engine_) {
}

template <typename Type, typename Size, engine_method EngineType>
void uniform_gpu(sycl::queue& queue,
Size count,
Type* dst,
dpc_engine<EngineType>& engine_,
Type a,
Type b,
const event_vector& deps = {});
void uniform(sycl::queue& queue,
Size count,
Type* dst,
dpc_engine<EngineType>& engine_,
Type a,
Type b,
const event_vector& deps = {});

template <typename Type, typename Size, engine_method EngineType>
void uniform_without_replacement_gpu(sycl::queue& queue,
Size count,
Type* dst,
Type* buffer,
dpc_engine<EngineType>& engine_,
Type a,
Type b,
const event_vector& deps = {});
void uniform_without_replacement(sycl::queue& queue,
Size count,
Type* dst,
Type* buffer,
dpc_engine<EngineType>& engine_,
Type a,
Type b,
const event_vector& deps = {});

template <typename Type, typename Size, engine_method EngineType>
void shuffle_gpu(sycl::queue& queue,
Size count,
Type* dst,
dpc_engine<EngineType>& engine_,
const event_vector& deps = {});
void shuffle(sycl::queue& queue,
Size count,
Type* dst,
dpc_engine<EngineType>& engine_,
const event_vector& deps = {});
#endif

}; // namespace oneapi::dal::backend::primitives
Loading

0 comments on commit 06d9f82

Please sign in to comment.