add comments and minor renaming

uxlfoundation · Dec 17, 2024 · 06d9f82 · 06d9f82
1 parent 917fa3d
commit 06d9f82
Show file tree

Hide file tree

Showing 13 changed files with 200 additions and 150 deletions.
diff --git a/cpp/oneapi/dal/algo/connected_components/backend/cpu/vertex_partitioning_default_kernel.hpp b/cpp/oneapi/dal/algo/connected_components/backend/cpu/vertex_partitioning_default_kernel.hpp
@@ -91,11 +91,11 @@ std::int32_t most_frequent_element(const std::atomic<std::int32_t> *components,
     std::int32_t *rnd_vertex_ids = allocate(vertex_allocator, samples_count);
 
     dal::backend::primitives::host_engine eng;
-    dal::backend::primitives::uniform_cpu<std::int32_t>(samples_count,
-                                                        rnd_vertex_ids,
-                                                        eng,
-                                                        0,
-                                                        vertex_count);
+    dal::backend::primitives::uniform<std::int32_t>(samples_count,
+                                                    rnd_vertex_ids,
+                                                    eng,
+                                                    0,
+                                                    vertex_count);
 
     std::int32_t *root_sample_counts = allocate(vertex_allocator, vertex_count);
 

diff --git a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_kernel_hist_impl_dpc.cpp b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_kernel_hist_impl_dpc.cpp
@@ -398,11 +398,11 @@ sycl::event train_kernel_hist_impl<Float, Bin, Index, Task>::gen_initial_tree_or
         for (Index node_idx = 0; node_idx < node_count; ++node_idx) {
             Index* gen_row_idx_global_ptr =
                 selected_row_global_ptr + ctx.selected_row_total_count_ * node_idx;
-            pr::uniform_cpu<Index>(ctx.selected_row_total_count_,
-                                   gen_row_idx_global_ptr,
-                                   rng_engine_list[engine_offset + node_idx],
-                                   0,
-                                   ctx.row_total_count_);
+            pr::uniform<Index>(ctx.selected_row_total_count_,
+                               gen_row_idx_global_ptr,
+                               rng_engine_list[engine_offset + node_idx],
+                               0,
+                               ctx.row_total_count_);
 
             if (ctx.distr_mode_) {
                 Index* node_ptr = node_list_ptr + node_idx * impl_const_t::node_prop_count_;
@@ -485,7 +485,7 @@ train_kernel_hist_impl<Float, Bin, Index, Task>::gen_feature_list(
     auto tree_map_ptr = node_vs_tree_map_list_host.get_mutable_data();
     if (ctx.selected_ftr_count_ != ctx.column_count_) {
         for (Index node = 0; node < node_count; ++node) {
-            pr::uniform_without_replacement_cpu<Index>(
+            pr::uniform_without_replacement<Index>(
                 ctx.selected_ftr_count_,
                 selected_features_host_ptr + node * ctx.selected_ftr_count_,
                 selected_features_host_ptr + (node + 1) * ctx.selected_ftr_count_,
@@ -534,11 +534,11 @@ train_kernel_hist_impl<Float, Bin, Index, Task>::gen_random_thresholds(
 
     // Generate random bins for selected features
     for (Index node = 0; node < node_count; ++node) {
-        pr::uniform_cpu<Float>(ctx.selected_ftr_count_,
-                               random_bins_host_ptr + node * ctx.selected_ftr_count_,
-                               rng_engine_list[tree_map_ptr[node]],
-                               0.0f,
-                               1.0f);
+        pr::uniform<Float>(ctx.selected_ftr_count_,
+                           random_bins_host_ptr + node * ctx.selected_ftr_count_,
+                           rng_engine_list[tree_map_ptr[node]],
+                           0.0f,
+                           1.0f);
     }
     auto event_rnd_generate =
         random_bins_com.assign_from_host(queue_, random_bins_host_ptr, random_bins_com.get_count());
@@ -1658,9 +1658,9 @@ sycl::event train_kernel_hist_impl<Float, Bin, Index, Task>::compute_results(
             const Float div1 = Float(1) / Float(built_tree_count + tree_idx_in_block + 1);
 
             for (Index column_idx = 0; column_idx < ctx.column_count_; ++column_idx) {
-                pr::shuffle_cpu<Index>(oob_row_count,
-                                       permutation_ptr,
-                                       engine_arr[built_tree_count + tree_idx_in_block]);
+                pr::shuffle<Index>(oob_row_count,
+                                   permutation_ptr,
+                                   engine_arr[built_tree_count + tree_idx_in_block]);
                 const Float oob_err_perm = compute_oob_error_perm(ctx,
                                                                   model_manager,
                                                                   data_host,

diff --git a/cpp/oneapi/dal/algo/louvain/backend/cpu/vertex_partitioning_default_kernel.hpp b/cpp/oneapi/dal/algo/louvain/backend/cpu/vertex_partitioning_default_kernel.hpp
@@ -206,7 +206,7 @@ inline Float move_nodes(const dal::preview::detail::topology<IndexType>& t,
         ld.random_order[index] = index;
     }
     // random shuffle
-    uniform_cpu<std::int32_t>(t._vertex_count, ld.index, ld.eng, 0, t._vertex_count);
+    uniform<std::int32_t>(t._vertex_count, ld.index, ld.eng, 0, t._vertex_count);
     for (std::int64_t index = 0; index < t._vertex_count; ++index) {
         std::swap(ld.random_order[index], ld.random_order[ld.index[index]]);
     }

diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/test/fixture.hpp b/cpp/oneapi/dal/backend/primitives/objective_function/test/fixture.hpp
@@ -577,7 +577,7 @@ class logloss_test : public te::float_algo_fixture<std::tuple_element_t<0, Param
 
         for (std::int32_t ij = 0; ij < num_checks; ++ij) {
             primitives::host_engine eng(2007 + dim * num_checks + ij);
-            pr::uniform_cpu<float_t>(dim, vec_host.get_mutable_data(), eng, -1.0, 1.0);
+            pr::uniform<float_t>(dim, vec_host.get_mutable_data(), eng, -1.0, 1.0);
             auto vec_gpu = vec_host.to_device(this->get_queue());
             auto out_vector =
                 ndarray<float_t, 1>::empty(this->get_queue(), { dim }, sycl::usm::alloc::device);

diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/test/spmd_fixture.hpp b/cpp/oneapi/dal/backend/primitives/objective_function/test/spmd_fixture.hpp
@@ -105,7 +105,7 @@ class logloss_spmd_test : public logloss_test<Param> {
             host_engine eng(2007 + dim * num_checks + ij);
             vecs_host[ij] =
                 (ndarray<float_t, 1>::empty(this->get_queue(), { dim }, sycl::usm::alloc::host));
-            uniform_cpu<float_t>(dim, vecs_host[ij].get_mutable_data(), eng, -1.0, 1.0);
+            uniform<float_t>(dim, vecs_host[ij].get_mutable_data(), eng, -1.0, 1.0);
             vecs_gpu[ij] = vecs_host[ij].to_device(this->get_queue());
         }
 

diff --git a/cpp/oneapi/dal/backend/primitives/optimizers/test/cg_solver_dpc.cpp b/cpp/oneapi/dal/backend/primitives/optimizers/test/cg_solver_dpc.cpp
@@ -44,7 +44,7 @@ class cg_solver_test : public te::float_algo_fixture<Param> {
         b_host_ = ndarray<float_t, 1>::empty(this->get_queue(), { n_ }, sycl::usm::alloc::host);
 
         primitives::host_engine eng(4014 + n_);
-        primitives::uniform_cpu<float_t>(n_, x_host_.get_mutable_data(), eng, -1.0, 1.0);
+        primitives::uniform<float_t>(n_, x_host_.get_mutable_data(), eng, -1.0, 1.0);
 
         create_stable_matrix(this->get_queue(), A_host_);
 

diff --git a/cpp/oneapi/dal/backend/primitives/optimizers/test/fixture.hpp b/cpp/oneapi/dal/backend/primitives/optimizers/test/fixture.hpp
@@ -135,8 +135,8 @@ void create_stable_matrix(sycl::queue& queue,
     auto eigen_values = ndarray<Float, 1>::empty(queue, { n }, sycl::usm::alloc::host);
     primitives::host_engine eng(2007 + n);
 
-    primitives::uniform_cpu<Float>(n * n, J.get_mutable_data(), eng, -1.0, 1.0);
-    primitives::uniform_cpu<Float>(n, eigen_values.get_mutable_data(), eng, bottom_eig, top_eig);
+    primitives::uniform<Float>(n * n, J.get_mutable_data(), eng, -1.0, 1.0);
+    primitives::uniform<Float>(n, eigen_values.get_mutable_data(), eng, bottom_eig, top_eig);
 
     // orthogonalize matrix J
     gram_schmidt(J);

diff --git a/cpp/oneapi/dal/backend/primitives/optimizers/test/newton_cg_dpc.cpp b/cpp/oneapi/dal/backend/primitives/optimizers/test/newton_cg_dpc.cpp
@@ -58,8 +58,8 @@ class newton_cg_test : public te::float_algo_fixture<Param> {
             ndarray<float_t, 1>::empty(this->get_queue(), { p_ + 1 }, sycl::usm::alloc::host);
 
         primitives::host_engine eng(2007 + n);
-        primitives::uniform_cpu<float_t>(n_ * p_, X_host.get_mutable_data(), eng, -10.0, 10.0);
-        primitives::uniform_cpu<float_t>(p_ + 1, params_host.get_mutable_data(), eng, -5.0, 5.0);
+        primitives::uniform<float_t>(n_ * p_, X_host.get_mutable_data(), eng, -10.0, 10.0);
+        primitives::uniform<float_t>(p_ + 1, params_host.get_mutable_data(), eng, -5.0, 5.0);
         for (std::int64_t i = 0; i < n_; ++i) {
             float_t val = 0;
             for (std::int64_t j = 0; j < p_; ++j) {
@@ -144,7 +144,7 @@ class newton_cg_test : public te::float_algo_fixture<Param> {
         auto b_host = ndarray<float_t, 1>::empty(this->get_queue(), { n_ }, sycl::usm::alloc::host);
 
         primitives::engine eng(4014 + n_);
-        uniform_cpu<float_t>(n_, solution_.get_mutable_data(), eng, -1.0, 1.0);
+        uniform<float_t>(n_, solution_.get_mutable_data(), eng, -1.0, 1.0);
 
         create_stable_matrix(this->get_queue(), A_host, float_t(0.1), float_t(5.0));
 
@@ -164,7 +164,7 @@ class newton_cg_test : public te::float_algo_fixture<Param> {
         auto buffer = ndarray<float_t, 1>::empty(this->get_queue(), { n_ }, sycl::usm::alloc::host);
 
         for (std::int32_t test_num = 0; test_num < 5; ++test_num) {
-            uniform_cpu<float_t>(n_, x_host.get_mutable_data(), eng, -1.0, 1.0);
+            uniform<float_t>(n_, x_host.get_mutable_data(), eng, -1.0, 1.0);
             auto x_gpu = x_host.to_device(this->get_queue());
             auto compute_event_vec = func_->update_x(x_gpu, true, {});
             wait_or_pass(compute_event_vec).wait_and_throw();

diff --git a/cpp/oneapi/dal/backend/primitives/rng/dpc_engine.hpp b/cpp/oneapi/dal/backend/primitives/rng/dpc_engine.hpp
@@ -53,6 +53,21 @@ struct dpc_engine_type<engine_method::philox4x32x10> {
     using type = oneapi::mkl::rng::philox4x32x10;
 };
 
+/// A class that provides a unified interface for random number generation on both CPU and GPU devices.
+///
+/// This class serves as a wrapper for random number generators (RNGs) that supports different engine types,
+/// enabling efficient random number generation on heterogeneous platforms using SYCL. It integrates a host
+/// (CPU) engine and a device (GPU) engine, allowing operations to be executed seamlessly on the appropriate
+/// device.
+///
+/// @tparam EngineType The RNG engine type to be used. Defaults to `engine_method::mt2203`.
+///
+/// @param[in] queue The SYCL queue used to manage device operations.
+/// @param[in] seed  The initial seed for the random number generator. Defaults to `777`.
+///
+/// The class provides functionality to skip ahead in the RNG sequence, retrieve engine states, and
+/// manage host and device engines independently. Support for `skip_ahead` on GPU is currently limited for
+/// some engine types.
 template <engine_method EngineType = engine_method::mt2203>
 class dpc_engine {
 public:

diff --git a/cpp/oneapi/dal/backend/primitives/rng/host_engine.hpp b/cpp/oneapi/dal/backend/primitives/rng/host_engine.hpp
@@ -26,6 +26,17 @@
 
 namespace oneapi::dal::backend::primitives {
 
+/// A class that provides an interface for random number generation on the host (CPU) only.
+///
+/// This class serves as a wrapper for host-based random number generators (RNGs), supporting multiple engine
+/// types for flexible and efficient random number generation on CPU. It abstracts the underlying engine
+/// implementation and provides an interface to manage and retrieve the engine's state.
+///
+/// @tparam EngineType The RNG engine type to be used. Defaults to `engine_method::mt2203`.
+///
+/// @param[in] seed  The initial seed for the random number generator. Defaults to `777`.
+///
+/// @note The class only supports host-based RNG and does not require a SYCL queue or device context.
 template <engine_method EngineType = engine_method::mt2203>
 class host_engine {
 public:

diff --git a/cpp/oneapi/dal/backend/primitives/rng/rng.hpp b/cpp/oneapi/dal/backend/primitives/rng/rng.hpp
@@ -27,18 +27,18 @@
 namespace oneapi::dal::backend::primitives {
 
 template <typename Type, typename Size, engine_method EngineType>
-void uniform_cpu(Size count, Type* dst, host_engine<EngineType>& host_engine, Type a, Type b) {
+void uniform(Size count, Type* dst, host_engine<EngineType>& host_engine, Type a, Type b) {
     auto state = host_engine.get_host_engine_state();
     uniform_dispatcher::uniform_by_cpu<Type>(count, dst, state, a, b);
 }
 
 template <typename Type, typename Size, engine_method EngineType>
-void uniform_without_replacement_cpu(Size count,
-                                     Type* dst,
-                                     Type* buffer,
-                                     host_engine<EngineType> host_engine,
-                                     Type a,
-                                     Type b) {
+void uniform_without_replacement(Size count,
+                                 Type* dst,
+                                 Type* buffer,
+                                 host_engine<EngineType> host_engine,
+                                 Type a,
+                                 Type b) {
     auto state = host_engine.get_host_engine_state();
     uniform_dispatcher::uniform_without_replacement_by_cpu<Type>(count, dst, buffer, state, a, b);
 }
@@ -48,7 +48,7 @@ template <typename Type,
           engine_method EngineType,
           typename T = Type,
           typename = std::enable_if_t<std::is_integral_v<T>>>
-void shuffle_cpu(Size count, Type* dst, host_engine<EngineType> host_engine) {
+void shuffle(Size count, Type* dst, host_engine<EngineType> host_engine) {
     auto state = host_engine.get_host_engine_state();
     Type idx[2];
     for (Size i = 0; i < count; ++i) {
@@ -59,7 +59,7 @@ void shuffle_cpu(Size count, Type* dst, host_engine<EngineType> host_engine) {
 
 #ifdef ONEDAL_DATA_PARALLEL
 template <typename Type, typename Size, engine_method EngineType>
-void uniform_cpu(Size count, Type* dst, dpc_engine<EngineType>& engine_, Type a, Type b) {
+void uniform(Size count, Type* dst, dpc_engine<EngineType>& engine_, Type a, Type b) {
     if (sycl::get_pointer_type(dst, engine_.get_queue().get_context()) ==
         sycl::usm::alloc::device) {
         throw domain_error(dal::detail::error_messages::unsupported_data_type());
@@ -70,12 +70,12 @@ void uniform_cpu(Size count, Type* dst, dpc_engine<EngineType>& engine_, Type a,
 }
 
 template <typename Type, typename Size, engine_method EngineType>
-void uniform_without_replacement_cpu(Size count,
-                                     Type* dst,
-                                     Type* buffer,
-                                     dpc_engine<EngineType>& engine_,
-                                     Type a,
-                                     Type b) {
+void uniform_without_replacement(Size count,
+                                 Type* dst,
+                                 Type* buffer,
+                                 dpc_engine<EngineType>& engine_,
+                                 Type a,
+                                 Type b) {
     if (sycl::get_pointer_type(dst, engine_.get_queue().get_context()) ==
         sycl::usm::alloc::device) {
         throw domain_error(dal::detail::error_messages::unsupported_data_type());
@@ -90,7 +90,7 @@ template <typename Type,
           engine_method EngineType,
           typename T = Type,
           typename = std::enable_if_t<std::is_integral_v<T>>>
-void shuffle_cpu(Size count, Type* dst, dpc_engine<EngineType>& engine_) {
+void shuffle(Size count, Type* dst, dpc_engine<EngineType>& engine_) {
     if (sycl::get_pointer_type(dst, engine_.get_queue().get_context()) ==
         sycl::usm::alloc::device) {
         throw domain_error(dal::detail::error_messages::unsupported_data_type());
@@ -105,30 +105,30 @@ void shuffle_cpu(Size count, Type* dst, dpc_engine<EngineType>& engine_) {
 }
 
 template <typename Type, typename Size, engine_method EngineType>
-void uniform_gpu(sycl::queue& queue,
-                 Size count,
-                 Type* dst,
-                 dpc_engine<EngineType>& engine_,
-                 Type a,
-                 Type b,
-                 const event_vector& deps = {});
+void uniform(sycl::queue& queue,
+             Size count,
+             Type* dst,
+             dpc_engine<EngineType>& engine_,
+             Type a,
+             Type b,
+             const event_vector& deps = {});
 
 template <typename Type, typename Size, engine_method EngineType>
-void uniform_without_replacement_gpu(sycl::queue& queue,
-                                     Size count,
-                                     Type* dst,
-                                     Type* buffer,
-                                     dpc_engine<EngineType>& engine_,
-                                     Type a,
-                                     Type b,
-                                     const event_vector& deps = {});
+void uniform_without_replacement(sycl::queue& queue,
+                                 Size count,
+                                 Type* dst,
+                                 Type* buffer,
+                                 dpc_engine<EngineType>& engine_,
+                                 Type a,
+                                 Type b,
+                                 const event_vector& deps = {});
 
 template <typename Type, typename Size, engine_method EngineType>
-void shuffle_gpu(sycl::queue& queue,
-                 Size count,
-                 Type* dst,
-                 dpc_engine<EngineType>& engine_,
-                 const event_vector& deps = {});
+void shuffle(sycl::queue& queue,
+             Size count,
+             Type* dst,
+             dpc_engine<EngineType>& engine_,
+             const event_vector& deps = {});
 #endif
 
 }; // namespace oneapi::dal::backend::primitives