From bf0df4e2ab46143cc5fb6eac4f987ac1bc632029 Mon Sep 17 00:00:00 2001 From: "Samuel K. Gutierrez" Date: Fri, 26 Jul 2024 20:14:45 -0600 Subject: [PATCH] Cleanup some qvi-split code. Signed-off-by: Samuel K. Gutierrez --- src/qvi-scope.cc | 4 +- src/qvi-split.cc | 99 ++++++++++++++++++++++-------------------------- src/qvi-split.h | 28 +++++++++----- 3 files changed, 66 insertions(+), 65 deletions(-) diff --git a/src/qvi-scope.cc b/src/qvi-scope.cc index 3621081..72ca6dd 100644 --- a/src/qvi-scope.cc +++ b/src/qvi-scope.cc @@ -258,10 +258,10 @@ qvi_scope_split( qvi_group_t *group = nullptr; qv_scope_t *ichild = nullptr; // Split the hardware resources based on the provided split parameters. - qvi_scope_split_coll_s splitcoll( + qvi_coll_hwsplit_s chwsplit( parent, npieces, color, maybe_obj_type ); - rc = splitcoll.split(&colorp, &hwpool); + rc = chwsplit.split(&colorp, &hwpool); if (rc != QV_SUCCESS) goto out; // Split underlying group. Notice the use of colorp here. rc = parent->group->split( diff --git a/src/qvi-split.cc b/src/qvi-split.cc index 3acc2c2..5fe3332 100644 --- a/src/qvi-split.cc +++ b/src/qvi-split.cc @@ -429,25 +429,25 @@ qvi_hwsplit_s::split(void) return rc; } -qvi_scope_split_coll_s::qvi_scope_split_coll_s( - qv_scope_t *parent_a, - uint_t split_size_a, - int mycolor_a, - qv_hw_obj_type_t split_at_type_a -) : parent(parent_a) - , mycolor(mycolor_a) +qvi_coll_hwsplit_s::qvi_coll_hwsplit_s( + qv_scope_t *parent, + uint_t npieces, + int color, + qv_hw_obj_type_t split_at_type +) : m_parent(parent) + , m_color(color) { - const qvi_group_t *const pgroup = parent->group; - if (pgroup->rank() == qvi_scope_split_coll_s::s_rootid) { - hwsplit = qvi_hwsplit_s( - parent, pgroup->size(), split_size_a, split_at_type_a + const qvi_group_t *const pgroup = m_parent->group; + if (pgroup->rank() == qvi_coll_hwsplit_s::s_rootid) { + m_hwsplit = qvi_hwsplit_s( + m_parent, pgroup->size(), npieces, split_at_type ); } } template int -qvi_scope_split_coll_s::scatter_values( +qvi_coll_hwsplit_s::scatter_values( int root, const std::vector &values, TYPE *value @@ -457,7 +457,7 @@ qvi_scope_split_coll_s::scatter_values( int rc = QV_SUCCESS; qvi_bbuff_t *rxbuff = nullptr; - qvi_group_t *const group = parent->group; + qvi_group_t *const group = m_parent->group; std::vector txbuffs(0); if (root == group->rank()) { const uint_t group_size = group->size(); @@ -491,12 +491,12 @@ qvi_scope_split_coll_s::scatter_values( template int -qvi_scope_split_coll_s::bcast_value( +qvi_coll_hwsplit_s::bcast_value( int root, TYPE *value ) { static_assert(std::is_trivially_copyable::value, ""); - qvi_group_t *const group = parent->group; + qvi_group_t *const group = m_parent->group; std::vector values; if (root == group->rank()) { @@ -508,13 +508,13 @@ qvi_scope_split_coll_s::bcast_value( template int -qvi_scope_split_coll_s::gather_values( +qvi_coll_hwsplit_s::gather_values( int root, TYPE invalue, std::vector &outvals ) { static_assert(std::is_trivially_copyable::value, ""); - qvi_group_t *const group = parent->group; + qvi_group_t *const group = m_parent->group; const uint_t group_size = group->size(); qvi_bbuff_t *txbuff = nullptr; @@ -557,12 +557,12 @@ qvi_scope_split_coll_s::gather_values( } int -qvi_scope_split_coll_s::gather_hwpools( +qvi_coll_hwsplit_s::gather_hwpools( int root, qvi_hwpool_s *txpool, std::vector &rxpools ) { - qvi_group_t *const group = parent->group; + qvi_group_t *const group = m_parent->group; const uint_t group_size = group->size(); // Pack the hardware pool into a buffer. qvi_bbuff_t txbuff; @@ -601,33 +601,33 @@ qvi_scope_split_coll_s::gather_hwpools( } int -qvi_scope_split_coll_s::gather(void) +qvi_coll_hwsplit_s::gather(void) { int rc = gather_values( - s_rootid, qvi_task_t::mytid(), hwsplit.m_taskids + s_rootid, qvi_task_t::mytid(), m_hwsplit.m_taskids ); if (qvi_unlikely(rc != QV_SUCCESS)) return rc; // Note that the result hwpools are copies, so we can modify them freely. rc = gather_hwpools( - s_rootid, parent->hwpool, hwsplit.m_hwpools + s_rootid, m_parent->hwpool, m_hwsplit.m_hwpools ); if (qvi_unlikely(rc != QV_SUCCESS)) return rc; rc = gather_values( - s_rootid, mycolor, hwsplit.m_colors + s_rootid, m_color, m_hwsplit.m_colors ); if (qvi_unlikely(rc != QV_SUCCESS)) return rc; - const int myid = parent->group->rank(); - const uint_t group_size = parent->group->size(); - if (myid == qvi_scope_split_coll_s::s_rootid) { - hwsplit.m_affinities.resize(group_size); + const int myid = m_parent->group->rank(); + const uint_t group_size = m_parent->group->size(); + if (myid == qvi_coll_hwsplit_s::s_rootid) { + m_hwsplit.m_affinities.resize(group_size); for (uint_t tid = 0; tid < group_size; ++tid) { hwloc_cpuset_t cpuset = nullptr; - rc = parent->group->task()->bind_top(&cpuset); + rc = m_parent->group->task()->bind_top(&cpuset); if (qvi_unlikely(rc != QV_SUCCESS)) break; // - rc = hwsplit.m_affinities[tid].set(cpuset); + rc = m_hwsplit.m_affinities[tid].set(cpuset); // Clean up. qvi_hwloc_bitmap_delete(&cpuset); if (qvi_unlikely(rc != QV_SUCCESS)) break; @@ -637,7 +637,7 @@ qvi_scope_split_coll_s::gather(void) } int -qvi_scope_split_coll_s::scatter_hwpools( +qvi_coll_hwsplit_s::scatter_hwpools( int root, const std::vector &pools, qvi_hwpool_s **pool @@ -646,7 +646,7 @@ qvi_scope_split_coll_s::scatter_hwpools( std::vector txbuffs(0); qvi_bbuff_t *rxbuff = nullptr; - qvi_group_t *const group = parent->group; + qvi_group_t *const group = m_parent->group; if (root == group->rank()) { const uint_t group_size = group->size(); @@ -678,28 +678,26 @@ qvi_scope_split_coll_s::scatter_hwpools( } int -qvi_scope_split_coll_s::scatter( +qvi_coll_hwsplit_s::scatter( int *colorp, qvi_hwpool_s **result ) { - const int rc = scatter_values(s_rootid, hwsplit.m_colors, colorp); + const int rc = scatter_values(s_rootid, m_hwsplit.m_colors, colorp); if (qvi_unlikely(rc != QV_SUCCESS)) return rc; - return scatter_hwpools(s_rootid, hwsplit.m_hwpools, result); + return scatter_hwpools(s_rootid, m_hwsplit.m_hwpools, result); } int -qvi_scope_split_coll_s::barrier(void) +qvi_coll_hwsplit_s::barrier(void) { - return parent->group->barrier(); + return m_parent->group->barrier(); } int -qvi_scope_split_coll_s::split( +qvi_coll_hwsplit_s::split( int *colorp, qvi_hwpool_s **result ) { - int rc2 = QV_SUCCESS; - const int myid = parent->group->rank(); // First consolidate the provided information, as this is coming from a // SPMD-like context (e.g., splitting a resource shared by MPI processes). // In most cases it is easiest to have a single task calculate the split @@ -708,28 +706,23 @@ qvi_scope_split_coll_s::split( // whose id is equal to qvi_global_split_t::rootid after gather has // completed. int rc = gather(); - if (rc != QV_SUCCESS) goto out; + if (qvi_unlikely(rc != QV_SUCCESS)) return rc; // The root does this calculation. - if (myid == s_rootid) { - rc2 = hwsplit.split(); + int rc2 = QV_SUCCESS; + if (m_parent->group->rank() == s_rootid) { + rc2 = m_hwsplit.split(); } // Wait for the split information. Explicitly barrier here in case the // underlying broadcast implementation polls heavily for completion. rc = barrier(); - if (rc != QV_SUCCESS) goto out; + if (qvi_unlikely(rc != QV_SUCCESS)) return rc; // To avoid hangs in split error paths, share the split rc with everyone. rc = bcast_value(s_rootid, &rc2); - if (rc != QV_SUCCESS) goto out; - // If the split failed, return the error to all callers. - if (rc2 != QV_SUCCESS) { - rc = rc2; - goto out; - } + if (qvi_unlikely(rc != QV_SUCCESS)) return rc; + // If the split failed, return the error to all participants. + if (qvi_unlikely(rc2 != QV_SUCCESS)) return rc2; // Scatter the results. - rc = scatter(colorp, result); - if (rc != QV_SUCCESS) goto out; -out: - return rc; + return scatter(colorp, result); } /* diff --git a/src/qvi-split.h b/src/qvi-split.h index 356cb77..5f8cc1f 100644 --- a/src/qvi-split.h +++ b/src/qvi-split.h @@ -153,29 +153,37 @@ struct qvi_hwsplit_s { * split operations requiring aggregated resource knowledge AND coordination * between tasks in the parent scope to perform a split. */ -struct qvi_scope_split_coll_s { +struct qvi_coll_hwsplit_s { /** * The root task ID used for collective operations. * We use 0 as the root because 0 will always exist. */ static constexpr int s_rootid = 0; /** Points to the parent scope that we are splitting. */ - qv_scope_t *parent = nullptr; + qv_scope_t *m_parent = nullptr; /** My color. */ - int mycolor = 0; + int m_color = 0; /** * Stores group-global hardware split information brought together by * collective operations across the members in the parent scope. */ - qvi_hwsplit_s hwsplit; + qvi_hwsplit_s m_hwsplit; /** Constructor. */ - qvi_scope_split_coll_s(void) = delete; + qvi_coll_hwsplit_s(void) = delete; /** Constructor. */ - qvi_scope_split_coll_s( - qv_scope_t *parent_a, - uint_t split_size_a, - int mycolor_a, - qv_hw_obj_type_t split_at_type_a + /** + * Hardware resources will be split based on the provided split parameters: + * - npieces: The number of splits requested. + * - color: Either user-supplied (explicitly set) or a value that requests + * us to do the coloring for the callers. + * maybe_obj_type: Potentially the object type that we are splitting at. This + * value influences how the splitting algorithms perform their mapping. + */ + qvi_coll_hwsplit_s( + qv_scope_t *parent, + uint_t npieces, + int color, + qv_hw_obj_type_t split_at_type ); /** */ template