From 220040b0670affb7e52c4113bfd8e68376896fee Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 18 Jun 2024 17:11:11 +0100 Subject: [PATCH] [SYCL][Graph] Permit empty & barrier nodes in WGU In order to enable the minimum viable real life usecase for the Whole Graph Update feature. Allow graphs to contain empty nodes and barrier nodes during update. Depends on PR https://github.com/intel/llvm/pull/14212 --- sycl/source/detail/graph_impl.cpp | 27 ++-- .../Update/whole_update_barrier_node.cpp | 119 +++++++++++++++++ .../Graph/Update/whole_update_empty_node.cpp | 120 ++++++++++++++++++ .../Extensions/CommandGraph/Update.cpp | 35 +++++ 4 files changed, 292 insertions(+), 9 deletions(-) create mode 100644 sycl/test-e2e/Graph/Update/whole_update_barrier_node.cpp create mode 100644 sycl/test-e2e/Graph/Update/whole_update_empty_node.cpp diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 329eab2aaf832..b3ee01843c77f 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -1236,18 +1236,22 @@ void exec_graph_impl::update( sycl::make_error_code(errc::invalid), "Node passed to update() is not part of the graph."); } - if (Node->MCGType != sycl::detail::CG::Kernel) { - throw sycl::exception(errc::invalid, "Cannot update non-kernel nodes"); - } - if (Node->MCommandGroup->getRequirements().size() == 0) { - continue; + if (!(Node->isEmpty() || Node->MCGType == sycl::detail::CG::Kernel || + Node->MCGType == sycl::detail::CG::Barrier)) { + throw sycl::exception(errc::invalid, + "Cannot update node type. Node must be be of " + "kernel, empty, or barrier type."); } - NeedScheduledUpdate = true; - UpdateRequirements.insert(UpdateRequirements.end(), - Node->MCommandGroup->getRequirements().begin(), - Node->MCommandGroup->getRequirements().end()); + if (const auto &CG = Node->MCommandGroup; + CG->getRequirements().size() != 0) { + NeedScheduledUpdate = true; + + UpdateRequirements.insert(UpdateRequirements.end(), + Node->MCommandGroup->getRequirements().begin(), + Node->MCommandGroup->getRequirements().end()); + } } // Clean up any execution events which have finished so we don't pass them to @@ -1290,6 +1294,11 @@ void exec_graph_impl::update( } void exec_graph_impl::updateImpl(std::shared_ptr Node) { + // Kernel node update is the only command type supported in UR for update. + // Updating any other types of nodes, e.g. empty & barrier nodes is a no-op. + if (Node->MCGType != sycl::detail::CG::Kernel) { + return; + } auto ContextImpl = sycl::detail::getSyclObjImpl(MContext); const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); auto DeviceImpl = sycl::detail::getSyclObjImpl(MGraphImpl->getDevice()); diff --git a/sycl/test-e2e/Graph/Update/whole_update_barrier_node.cpp b/sycl/test-e2e/Graph/Update/whole_update_barrier_node.cpp new file mode 100644 index 0000000000000..86bc166726e1a --- /dev/null +++ b/sycl/test-e2e/Graph/Update/whole_update_barrier_node.cpp @@ -0,0 +1,119 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// REQUIRES: aspect-usm_shared_allocations + +// Tests that whole graph update works when a graph contains a barrier node. + +#include "../graph_common.hpp" + +// Queue submissions that can be recorded to a graph, with a barrier node +// separating initialization and computation kernel nodes +template +void RecordGraph(queue &Queue, size_t Size, T *Input1, T *Input2, T *Output) { + Queue.submit([&](handler &CGH) { + CGH.single_task([=]() { + for (int i = 0; i < Size; i++) { + Input1[i] += i; + } + }); + }); + + Queue.submit([&](handler &CGH) { + CGH.single_task([=]() { + for (int i = 0; i < Size; i++) { + Input2[i] += i; + } + }); + }); + + Queue.ext_oneapi_submit_barrier(); + + Queue.submit([&](handler &CGH) { + CGH.single_task([=]() { + for (int i = 0; i < Size; i++) { + Output[i] = Input1[i] * Input2[i]; + } + }); + }); +} + +int main() { + queue Queue{}; + + using T = int; + + // USM allocations for GraphA + T *InputA1 = malloc_shared(Size, Queue); + T *InputA2 = malloc_shared(Size, Queue); + T *OutputA = malloc_shared(Size, Queue); + + // Initialize USM allocations + T Pattern1 = 0xA; + T Pattern2 = 0x42; + T PatternZero = 0; + + Queue.fill(InputA1, Pattern1, Size); + Queue.fill(InputA2, Pattern2, Size); + Queue.fill(OutputA, PatternZero, Size); + Queue.wait(); + + // Define GraphA + exp_ext::command_graph GraphA{Queue}; + GraphA.begin_recording(Queue); + RecordGraph(Queue, Size, InputA1, InputA2, OutputA); + GraphA.end_recording(); + + // Finalize, run, and validate GraphA + auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{}); + Queue.ext_oneapi_graph(GraphExecA).wait(); + + for (int i = 0; i < Size; i++) { + T Ref = (Pattern1 + i) * (Pattern2 + i); + assert(check_value(i, Ref, OutputA[i], "OutputA")); + } + + // Create GraphB which will be used to update GraphA + exp_ext::command_graph GraphB{Queue}; + + // USM allocations for GraphB + T *InputB1 = malloc_shared(Size, Queue); + T *InputB2 = malloc_shared(Size, Queue); + T *OutputB = malloc_shared(Size, Queue); + + // Initialize GraphB allocations + Pattern1 = -42; + Pattern2 = 0xF; + + Queue.fill(InputB1, Pattern1, Size); + Queue.fill(InputB2, Pattern2, Size); + Queue.fill(OutputB, PatternZero, Size); + Queue.wait(); + + // Create GraphB + GraphB.begin_recording(Queue); + RecordGraph(Queue, Size, InputB1, InputB2, OutputB); + GraphB.end_recording(); + + // Update executable GraphA with GraphB, run, and validate + GraphExecA.update(GraphB); + Queue.ext_oneapi_graph(GraphExecA).wait(); + + for (int i = 0; i < Size; i++) { + T Ref = (Pattern1 + i) * (Pattern2 + i); + assert(check_value(i, Ref, OutputB[i], "OutputB")); + } + + free(InputA1, Queue); + free(InputA2, Queue); + free(OutputA, Queue); + + free(InputB1, Queue); + free(InputB2, Queue); + free(OutputB, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/whole_update_empty_node.cpp b/sycl/test-e2e/Graph/Update/whole_update_empty_node.cpp new file mode 100644 index 0000000000000..8816eb385936f --- /dev/null +++ b/sycl/test-e2e/Graph/Update/whole_update_empty_node.cpp @@ -0,0 +1,120 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// REQUIRES: aspect-usm_shared_allocations + +// Tests that whole graph update works when a graph contain an empty node. + +#include "../graph_common.hpp" + +// Creates a graph with an empty node separating initialization and computation +// kernel nodes +template +void CreateGraph( + exp_ext::command_graph &Graph, + size_t Size, T *Input1, T *Input2, T *Output) { + Graph.add([&](handler &CGH) { + CGH.single_task([=]() { + for (int i = 0; i < Size; i++) { + Input1[i] += i; + } + }); + }); + + Graph.add([&](handler &CGH) { + CGH.single_task([=]() { + for (int i = 0; i < Size; i++) { + Input2[i] += i; + } + }); + }); + + auto EmptyNodeA = + Graph.add({exp_ext::property::node::depends_on_all_leaves()}); + + Graph.add( + [&](handler &CGH) { + CGH.single_task([=]() { + for (int i = 0; i < Size; i++) { + Output[i] = Input1[i] * Input2[i]; + } + }); + }, + {exp_ext::property::node::depends_on(EmptyNodeA)}); +} + +int main() { + queue Queue{}; + + using T = int; + + // USM allocations for GraphA + T *InputA1 = malloc_shared(Size, Queue); + T *InputA2 = malloc_shared(Size, Queue); + T *OutputA = malloc_shared(Size, Queue); + + // Initialize USM allocations + T Pattern1 = 0xA; + T Pattern2 = 0x42; + T PatternZero = 0; + + Queue.fill(InputA1, Pattern1, Size); + Queue.fill(InputA2, Pattern2, Size); + Queue.fill(OutputA, PatternZero, Size); + Queue.wait(); + + // Construct GraphA + exp_ext::command_graph GraphA{Queue}; + CreateGraph(GraphA, Size, InputA1, InputA2, OutputA); + + // Finalize, run, and validate GraphA + auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{}); + Queue.ext_oneapi_graph(GraphExecA).wait(); + + for (int i = 0; i < Size; i++) { + T Ref = (Pattern1 + i) * (Pattern2 + i); + assert(check_value(i, Ref, OutputA[i], "OutputA")); + } + + // Create GraphB which will be used to update GraphA + exp_ext::command_graph GraphB{Queue}; + + // USM allocations for GraphB + T *InputB1 = malloc_shared(Size, Queue); + T *InputB2 = malloc_shared(Size, Queue); + T *OutputB = malloc_shared(Size, Queue); + + // Initialize GraphB + Pattern1 = -42; + Pattern2 = 0xF; + + Queue.fill(InputB1, Pattern1, Size); + Queue.fill(InputB2, Pattern2, Size); + Queue.fill(OutputB, PatternZero, Size); + Queue.wait(); + + // Construct GraphB + CreateGraph(GraphB, Size, InputB1, InputB2, OutputB); + + // Update executable GraphA with GraphB, run, and validate + GraphExecA.update(GraphB); + Queue.ext_oneapi_graph(GraphExecA).wait(); + + for (int i = 0; i < Size; i++) { + T Ref = (Pattern1 + i) * (Pattern2 + i); + assert(check_value(i, Ref, OutputB[i], "OutputB")); + } + + free(InputA1, Queue); + free(InputA2, Queue); + free(OutputA, Queue); + + free(InputB1, Queue); + free(InputB2, Queue); + free(OutputB, Queue); + return 0; +} diff --git a/sycl/unittests/Extensions/CommandGraph/Update.cpp b/sycl/unittests/Extensions/CommandGraph/Update.cpp index bb813cf211246..59182ed5b5226 100644 --- a/sycl/unittests/Extensions/CommandGraph/Update.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Update.cpp @@ -109,6 +109,11 @@ TEST_F(CommandGraphTest, UpdateNodeTypeExceptions) { cgh.host_task([]() {}); })); + ASSERT_ANY_THROW(auto NodeBarrier = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynamicParam); + cgh.ext_oneapi_barrier(); + })); + auto NodeEmpty = Graph.add(); experimental::command_graph Subgraph(Queue.get_context(), Dev); @@ -375,3 +380,33 @@ TEST_F(WholeGraphUpdateTest, MissingUpdatableProperty) { auto GraphExec = Graph.finalize(); EXPECT_THROW(GraphExec.update(UpdateGraph), sycl::exception); } + +TEST_F(WholeGraphUpdateTest, EmptyNode) { + // Test that updating a graph that has an empty node is not an error + auto NodeEmpty = Graph.add(); + auto UpdateNodeEmpty = UpdateGraph.add(); + + auto NodeKernel = Graph.add(EmptyKernel); + auto UpdateNodeKernel = UpdateGraph.add(EmptyKernel); + + auto GraphExec = Graph.finalize(experimental::property::graph::updatable{}); + GraphExec.update(UpdateGraph); +} + +TEST_F(WholeGraphUpdateTest, BarrierNode) { + // Test that updating a graph that has a barrier node is not an error + Graph.begin_recording(Queue); + auto NodeKernel = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Queue.ext_oneapi_submit_barrier({NodeKernel}); + Graph.end_recording(Queue); + + UpdateGraph.begin_recording(Queue); + auto UpdateNodeKernel = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Queue.ext_oneapi_submit_barrier({UpdateNodeKernel}); + UpdateGraph.end_recording(Queue); + + auto GraphExec = Graph.finalize(experimental::property::graph::updatable{}); + GraphExec.update(UpdateGraph); +}