Skip to content

Commit

Permalink
[SYCL][Graph] Fixes enqueue barrier slowdown (#11933)
Browse files Browse the repository at this point in the history
The implementation of ext_oneapi_submit_barrier involved exponential
slowdown due to unnecessary extra dependencies to barrier nodes. This PR
solves this issue by:
1) improving the function that searches for graph leaves (exit nodes) 
2) removing unnecessary dependencies to previous barriers when adding
new nodes.

Addresses Issue: #11915
  • Loading branch information
mfrancepillois authored Nov 27, 2023
1 parent fdfaadb commit 079fc97
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 9 deletions.
8 changes: 3 additions & 5 deletions sycl/source/detail/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,15 +416,13 @@ void graph_impl::makeEdge(std::shared_ptr<node_impl> Src,

std::vector<sycl::detail::EventImplPtr> graph_impl::getExitNodesEvents() {
std::vector<sycl::detail::EventImplPtr> Events;
auto EnqueueExitNodesEvents = [&](std::shared_ptr<node_impl> &Node,
std::deque<std::shared_ptr<node_impl>> &) {

for (auto Node : MNodeStorage) {
if (Node->MSuccessors.empty()) {
Events.push_back(getEventForNode(Node));
}
return false;
};
}

searchDepthFirst(EnqueueExitNodesEvents);
return Events;
}

Expand Down
20 changes: 19 additions & 1 deletion sycl/source/detail/graph_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,24 @@ class graph_impl {
/// @return vector of events associated to exit nodes.
std::vector<sycl::detail::EventImplPtr> getExitNodesEvents();

/// Removes all Barrier nodes from the list of extra dependencies
/// MExtraDependencies.
/// @return vector of events associated to previous barrier nodes.
std::vector<sycl::detail::EventImplPtr>
removeBarriersFromExtraDependencies() {
std::vector<sycl::detail::EventImplPtr> Events;
for (auto It = MExtraDependencies.begin();
It != MExtraDependencies.end();) {
if ((*It)->MCGType == sycl::detail::CG::Barrier) {
Events.push_back(getEventForNode(*It));
It = MExtraDependencies.erase(It);
} else {
++It;
}
}
return Events;
}

private:
/// Iterate over the graph depth-first and run \p NodeFunc on each node.
/// @param NodeFunc A function which receives as input a node in the graph to
Expand Down Expand Up @@ -861,7 +879,7 @@ class graph_impl {
/// added to this graph.
/// This list is mainly used by barrier nodes which must be considered
/// as predecessors for all nodes subsequently added to the graph.
std::vector<std::shared_ptr<node_impl>> MExtraDependencies;
std::list<std::shared_ptr<node_impl>> MExtraDependencies;
};

/// Class representing the implementation of command_graph<executable>.
Expand Down
8 changes: 8 additions & 0 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,14 @@ event handler::finalize() {
// nodes/events of the graph
if (MEventsWaitWithBarrier.size() == 0) {
MEventsWaitWithBarrier = GraphImpl->getExitNodesEvents();
// Graph-wide barriers take precedence over previous one.
// We therefore remove the previous ones from ExtraDependencies list.
// The current barrier is then added to this list in the graph_impl.
std::vector<detail::EventImplPtr> EventsBarriers =
GraphImpl->removeBarriersFromExtraDependencies();
MEventsWaitWithBarrier.insert(std::end(MEventsWaitWithBarrier),
std::begin(EventsBarriers),
std::end(EventsBarriers));
}
CGData.MEvents.insert(std::end(CGData.MEvents),
std::begin(MEventsWaitWithBarrier),
Expand Down
6 changes: 3 additions & 3 deletions sycl/unittests/Extensions/CommandGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1458,7 +1458,7 @@ TEST_F(CommandGraphTest, EnqueueMultipleBarrier) {
// (B2)
// /|\
// / | \
// (6) (7) (8) (those nodes also have B1 as a predecessor)
// (6) (7) (8)
ASSERT_EQ(GraphImpl->MRoots.size(), 3lu);
for (auto Root : GraphImpl->MRoots) {
auto Node = Root.lock();
Expand All @@ -1468,7 +1468,7 @@ TEST_F(CommandGraphTest, EnqueueMultipleBarrier) {
ASSERT_EQ(GraphImpl->getEventForNode(SuccNode),
sycl::detail::getSyclObjImpl(Barrier1));
ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu);
ASSERT_EQ(SuccNode->MSuccessors.size(), 6lu);
ASSERT_EQ(SuccNode->MSuccessors.size(), 3lu);
for (auto Succ1 : SuccNode->MSuccessors) {
auto SuccBarrier1 = Succ1.lock();
if (SuccBarrier1->MCGType == sycl::detail::CG::Barrier) {
Expand All @@ -1479,7 +1479,7 @@ TEST_F(CommandGraphTest, EnqueueMultipleBarrier) {
for (auto Succ2 : SuccBarrier1->MSuccessors) {
auto SuccBarrier2 = Succ2.lock();
// Nodes 6, 7, 8
ASSERT_EQ(SuccBarrier2->MPredecessors.size(), 2lu);
ASSERT_EQ(SuccBarrier2->MPredecessors.size(), 1lu);
ASSERT_EQ(SuccBarrier2->MSuccessors.size(), 0lu);
}
} else {
Expand Down

0 comments on commit 079fc97

Please sign in to comment.