Skip to content

Commit

Permalink
Cleanup some qvi-split code.
Browse files Browse the repository at this point in the history
Signed-off-by: Samuel K. Gutierrez <samuel@lanl.gov>
  • Loading branch information
samuelkgutierrez committed Jul 27, 2024
1 parent 4a47788 commit bf0df4e
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 65 deletions.
4 changes: 2 additions & 2 deletions src/qvi-scope.cc
Original file line number Diff line number Diff line change
Expand Up @@ -258,10 +258,10 @@ qvi_scope_split(
qvi_group_t *group = nullptr;
qv_scope_t *ichild = nullptr;
// Split the hardware resources based on the provided split parameters.
qvi_scope_split_coll_s splitcoll(
qvi_coll_hwsplit_s chwsplit(
parent, npieces, color, maybe_obj_type
);
rc = splitcoll.split(&colorp, &hwpool);
rc = chwsplit.split(&colorp, &hwpool);
if (rc != QV_SUCCESS) goto out;
// Split underlying group. Notice the use of colorp here.
rc = parent->group->split(
Expand Down
99 changes: 46 additions & 53 deletions src/qvi-split.cc
Original file line number Diff line number Diff line change
Expand Up @@ -429,25 +429,25 @@ qvi_hwsplit_s::split(void)
return rc;
}

qvi_scope_split_coll_s::qvi_scope_split_coll_s(
qv_scope_t *parent_a,
uint_t split_size_a,
int mycolor_a,
qv_hw_obj_type_t split_at_type_a
) : parent(parent_a)
, mycolor(mycolor_a)
qvi_coll_hwsplit_s::qvi_coll_hwsplit_s(
qv_scope_t *parent,
uint_t npieces,
int color,
qv_hw_obj_type_t split_at_type
) : m_parent(parent)
, m_color(color)
{
const qvi_group_t *const pgroup = parent->group;
if (pgroup->rank() == qvi_scope_split_coll_s::s_rootid) {
hwsplit = qvi_hwsplit_s(
parent, pgroup->size(), split_size_a, split_at_type_a
const qvi_group_t *const pgroup = m_parent->group;
if (pgroup->rank() == qvi_coll_hwsplit_s::s_rootid) {
m_hwsplit = qvi_hwsplit_s(
m_parent, pgroup->size(), npieces, split_at_type
);
}
}

template <typename TYPE>
int
qvi_scope_split_coll_s::scatter_values(
qvi_coll_hwsplit_s::scatter_values(
int root,
const std::vector<TYPE> &values,
TYPE *value
Expand All @@ -457,7 +457,7 @@ qvi_scope_split_coll_s::scatter_values(
int rc = QV_SUCCESS;
qvi_bbuff_t *rxbuff = nullptr;

qvi_group_t *const group = parent->group;
qvi_group_t *const group = m_parent->group;
std::vector<qvi_bbuff_t *> txbuffs(0);
if (root == group->rank()) {
const uint_t group_size = group->size();
Expand Down Expand Up @@ -491,12 +491,12 @@ qvi_scope_split_coll_s::scatter_values(

template <typename TYPE>
int
qvi_scope_split_coll_s::bcast_value(
qvi_coll_hwsplit_s::bcast_value(
int root,
TYPE *value
) {
static_assert(std::is_trivially_copyable<TYPE>::value, "");
qvi_group_t *const group = parent->group;
qvi_group_t *const group = m_parent->group;

std::vector<TYPE> values;
if (root == group->rank()) {
Expand All @@ -508,13 +508,13 @@ qvi_scope_split_coll_s::bcast_value(

template <typename TYPE>
int
qvi_scope_split_coll_s::gather_values(
qvi_coll_hwsplit_s::gather_values(
int root,
TYPE invalue,
std::vector<TYPE> &outvals
) {
static_assert(std::is_trivially_copyable<TYPE>::value, "");
qvi_group_t *const group = parent->group;
qvi_group_t *const group = m_parent->group;
const uint_t group_size = group->size();

qvi_bbuff_t *txbuff = nullptr;
Expand Down Expand Up @@ -557,12 +557,12 @@ qvi_scope_split_coll_s::gather_values(
}

int
qvi_scope_split_coll_s::gather_hwpools(
qvi_coll_hwsplit_s::gather_hwpools(
int root,
qvi_hwpool_s *txpool,
std::vector<qvi_hwpool_s *> &rxpools
) {
qvi_group_t *const group = parent->group;
qvi_group_t *const group = m_parent->group;
const uint_t group_size = group->size();
// Pack the hardware pool into a buffer.
qvi_bbuff_t txbuff;
Expand Down Expand Up @@ -601,33 +601,33 @@ qvi_scope_split_coll_s::gather_hwpools(
}

int
qvi_scope_split_coll_s::gather(void)
qvi_coll_hwsplit_s::gather(void)
{
int rc = gather_values(
s_rootid, qvi_task_t::mytid(), hwsplit.m_taskids
s_rootid, qvi_task_t::mytid(), m_hwsplit.m_taskids
);
if (qvi_unlikely(rc != QV_SUCCESS)) return rc;
// Note that the result hwpools are copies, so we can modify them freely.
rc = gather_hwpools(
s_rootid, parent->hwpool, hwsplit.m_hwpools
s_rootid, m_parent->hwpool, m_hwsplit.m_hwpools
);
if (qvi_unlikely(rc != QV_SUCCESS)) return rc;

rc = gather_values(
s_rootid, mycolor, hwsplit.m_colors
s_rootid, m_color, m_hwsplit.m_colors
);
if (qvi_unlikely(rc != QV_SUCCESS)) return rc;

const int myid = parent->group->rank();
const uint_t group_size = parent->group->size();
if (myid == qvi_scope_split_coll_s::s_rootid) {
hwsplit.m_affinities.resize(group_size);
const int myid = m_parent->group->rank();
const uint_t group_size = m_parent->group->size();
if (myid == qvi_coll_hwsplit_s::s_rootid) {
m_hwsplit.m_affinities.resize(group_size);
for (uint_t tid = 0; tid < group_size; ++tid) {
hwloc_cpuset_t cpuset = nullptr;
rc = parent->group->task()->bind_top(&cpuset);
rc = m_parent->group->task()->bind_top(&cpuset);
if (qvi_unlikely(rc != QV_SUCCESS)) break;
//
rc = hwsplit.m_affinities[tid].set(cpuset);
rc = m_hwsplit.m_affinities[tid].set(cpuset);
// Clean up.
qvi_hwloc_bitmap_delete(&cpuset);
if (qvi_unlikely(rc != QV_SUCCESS)) break;
Expand All @@ -637,7 +637,7 @@ qvi_scope_split_coll_s::gather(void)
}

int
qvi_scope_split_coll_s::scatter_hwpools(
qvi_coll_hwsplit_s::scatter_hwpools(
int root,
const std::vector<qvi_hwpool_s *> &pools,
qvi_hwpool_s **pool
Expand All @@ -646,7 +646,7 @@ qvi_scope_split_coll_s::scatter_hwpools(
std::vector<qvi_bbuff_t *> txbuffs(0);
qvi_bbuff_t *rxbuff = nullptr;

qvi_group_t *const group = parent->group;
qvi_group_t *const group = m_parent->group;

if (root == group->rank()) {
const uint_t group_size = group->size();
Expand Down Expand Up @@ -678,28 +678,26 @@ qvi_scope_split_coll_s::scatter_hwpools(
}

int
qvi_scope_split_coll_s::scatter(
qvi_coll_hwsplit_s::scatter(
int *colorp,
qvi_hwpool_s **result
) {
const int rc = scatter_values(s_rootid, hwsplit.m_colors, colorp);
const int rc = scatter_values(s_rootid, m_hwsplit.m_colors, colorp);
if (qvi_unlikely(rc != QV_SUCCESS)) return rc;
return scatter_hwpools(s_rootid, hwsplit.m_hwpools, result);
return scatter_hwpools(s_rootid, m_hwsplit.m_hwpools, result);
}

int
qvi_scope_split_coll_s::barrier(void)
qvi_coll_hwsplit_s::barrier(void)
{
return parent->group->barrier();
return m_parent->group->barrier();
}

int
qvi_scope_split_coll_s::split(
qvi_coll_hwsplit_s::split(
int *colorp,
qvi_hwpool_s **result
) {
int rc2 = QV_SUCCESS;
const int myid = parent->group->rank();
// First consolidate the provided information, as this is coming from a
// SPMD-like context (e.g., splitting a resource shared by MPI processes).
// In most cases it is easiest to have a single task calculate the split
Expand All @@ -708,28 +706,23 @@ qvi_scope_split_coll_s::split(
// whose id is equal to qvi_global_split_t::rootid after gather has
// completed.
int rc = gather();
if (rc != QV_SUCCESS) goto out;
if (qvi_unlikely(rc != QV_SUCCESS)) return rc;
// The root does this calculation.
if (myid == s_rootid) {
rc2 = hwsplit.split();
int rc2 = QV_SUCCESS;
if (m_parent->group->rank() == s_rootid) {
rc2 = m_hwsplit.split();
}
// Wait for the split information. Explicitly barrier here in case the
// underlying broadcast implementation polls heavily for completion.
rc = barrier();
if (rc != QV_SUCCESS) goto out;
if (qvi_unlikely(rc != QV_SUCCESS)) return rc;
// To avoid hangs in split error paths, share the split rc with everyone.
rc = bcast_value(s_rootid, &rc2);
if (rc != QV_SUCCESS) goto out;
// If the split failed, return the error to all callers.
if (rc2 != QV_SUCCESS) {
rc = rc2;
goto out;
}
if (qvi_unlikely(rc != QV_SUCCESS)) return rc;
// If the split failed, return the error to all participants.
if (qvi_unlikely(rc2 != QV_SUCCESS)) return rc2;
// Scatter the results.
rc = scatter(colorp, result);
if (rc != QV_SUCCESS) goto out;
out:
return rc;
return scatter(colorp, result);
}

/*
Expand Down
28 changes: 18 additions & 10 deletions src/qvi-split.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,29 +153,37 @@ struct qvi_hwsplit_s {
* split operations requiring aggregated resource knowledge AND coordination
* between tasks in the parent scope to perform a split.
*/
struct qvi_scope_split_coll_s {
struct qvi_coll_hwsplit_s {
/**
* The root task ID used for collective operations.
* We use 0 as the root because 0 will always exist.
*/
static constexpr int s_rootid = 0;
/** Points to the parent scope that we are splitting. */
qv_scope_t *parent = nullptr;
qv_scope_t *m_parent = nullptr;
/** My color. */
int mycolor = 0;
int m_color = 0;
/**
* Stores group-global hardware split information brought together by
* collective operations across the members in the parent scope.
*/
qvi_hwsplit_s hwsplit;
qvi_hwsplit_s m_hwsplit;
/** Constructor. */
qvi_scope_split_coll_s(void) = delete;
qvi_coll_hwsplit_s(void) = delete;
/** Constructor. */
qvi_scope_split_coll_s(
qv_scope_t *parent_a,
uint_t split_size_a,
int mycolor_a,
qv_hw_obj_type_t split_at_type_a
/**
* Hardware resources will be split based on the provided split parameters:
* - npieces: The number of splits requested.
* - color: Either user-supplied (explicitly set) or a value that requests
* us to do the coloring for the callers.
* maybe_obj_type: Potentially the object type that we are splitting at. This
* value influences how the splitting algorithms perform their mapping.
*/
qvi_coll_hwsplit_s(
qv_scope_t *parent,
uint_t npieces,
int color,
qv_hw_obj_type_t split_at_type
);
/** */
template <typename TYPE>
Expand Down

0 comments on commit bf0df4e

Please sign in to comment.