From 49de96e5124170ce96824fefb88dc0f62e529775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Tue, 2 Apr 2024 17:35:28 +0100 Subject: [PATCH] Address review comments --- sycl/doc/design/CommandGraph.md | 19 ++- sycl/source/detail/graph_impl.cpp | 10 +- .../executable_graph_update_ordering.cpp | 16 -- ...fer.cpp => whole_update_double_buffer.cpp} | 6 +- ...h_update.cpp => whole_update_subgraph.cpp} | 6 +- .../Graph/Explicit/whole_update_usm.cpp | 10 ++ .../whole_update_double_buffer.cpp | 10 -- .../Graph/Inputs/whole_update_subgraph.cpp | 73 +++++++++ .../{Update => Inputs}/whole_update_usm.cpp | 10 -- sycl/test-e2e/Graph/README.md | 0 .../executable_graph_update_ordering.cpp | 16 -- .../whole_update_double_buffer.cpp | 10 ++ ...h_update.cpp => whole_update_subgraph.cpp} | 6 +- ...double_buffer.cpp => whole_update_usm.cpp} | 6 +- .../Graph/Update/whole_update_delay.cpp | 138 ------------------ .../Update/whole_update_dynamic_param.cpp | 15 +- .../Extensions/CommandGraph/Common.hpp | 1 - .../Extensions/CommandGraph/Update.cpp | 104 ++++++------- 18 files changed, 185 insertions(+), 271 deletions(-) delete mode 100644 sycl/test-e2e/Graph/Explicit/executable_graph_update_ordering.cpp rename sycl/test-e2e/Graph/Explicit/{double_buffer.cpp => whole_update_double_buffer.cpp} (78%) rename sycl/test-e2e/Graph/Explicit/{executable_graph_update.cpp => whole_update_subgraph.cpp} (80%) create mode 100644 sycl/test-e2e/Graph/Explicit/whole_update_usm.cpp rename sycl/test-e2e/Graph/{Update => Inputs}/whole_update_double_buffer.cpp (84%) create mode 100644 sycl/test-e2e/Graph/Inputs/whole_update_subgraph.cpp rename sycl/test-e2e/Graph/{Update => Inputs}/whole_update_usm.cpp (84%) create mode 100644 sycl/test-e2e/Graph/README.md delete mode 100644 sycl/test-e2e/Graph/RecordReplay/executable_graph_update_ordering.cpp create mode 100644 sycl/test-e2e/Graph/RecordReplay/whole_update_double_buffer.cpp rename sycl/test-e2e/Graph/RecordReplay/{executable_graph_update.cpp => whole_update_subgraph.cpp} (80%) rename sycl/test-e2e/Graph/RecordReplay/{double_buffer.cpp => whole_update_usm.cpp} (79%) delete mode 100644 sycl/test-e2e/Graph/Update/whole_update_delay.cpp diff --git a/sycl/doc/design/CommandGraph.md b/sycl/doc/design/CommandGraph.md index c1e570d13ab21..87c4a57bb5c4f 100644 --- a/sycl/doc/design/CommandGraph.md +++ b/sycl/doc/design/CommandGraph.md @@ -234,7 +234,9 @@ yet been implemented. ### Design Challenges -Graph update faces significant design challenges in SYCL: +#### Explicit Update + +Explicit updates of individual nodes faces significant design challenges in SYCL: * Lambda capture order is explicitly undefined in C++, so the user cannot reason about the indices of arguments captured by kernel lambdas. @@ -256,9 +258,18 @@ can be used: extension](../extensions/proposed/sycl_ext_oneapi_free_function_kernels.asciidoc) * OpenCL interop kernels created from SPIR-V source at runtime. -A possible future workaround lambda capture issues could be "Whole-Graph Update" -where if we can guarantee that lambda capture order is the same across two -different recordings we can then match parameter order when updating. +A workaround for the lambda capture issues is the "Whole-Graph Update" feature. +Since the lambda capture order is the same across two different recordings, we +can match the parameter order when updating. + +#### Whole-Graph Update + +The current implementation of the whole-graph update feature relies on the +assumption that both graphs should have a similar topology. Currently, the +implementation only checks that both graphs have an identical number of nodes +and that each node contains the same number of edges. A possible design change +could be to add more checks to the implementation. This would give the user +better error messages but with possible performance penalties. ### Scheduler Integration diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 13fa608a5d890..237d63f7ec463 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -1161,12 +1161,12 @@ void exec_graph_impl::update(std::shared_ptr GraphImpl) { if (MDevice != GraphImpl->getDevice()) { throw sycl::exception( sycl::make_error_code(errc::invalid), - "The graphs must have been created with matching devices."); + "Cannot update using a graph created with a different device."); } if (MContext != GraphImpl->getContext()) { throw sycl::exception( sycl::make_error_code(errc::invalid), - "The graphs must have been created with matching contexts."); + "Cannot update using a graph created with a different context."); } if (MNodeStorage.size() != GraphImpl->MNodeStorage.size()) { @@ -1183,6 +1183,12 @@ void exec_graph_impl::update(std::shared_ptr GraphImpl) { "Mismatch found in the number of edges. The " "graphs must have a matching topology."); } + + if (MNodeStorage[i]->MCGType != GraphImpl->MNodeStorage[i]->MCGType) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Mismatch found in the type of nodes. Each pair " + "of nodes being updated must have the same type"); + } } } diff --git a/sycl/test-e2e/Graph/Explicit/executable_graph_update_ordering.cpp b/sycl/test-e2e/Graph/Explicit/executable_graph_update_ordering.cpp deleted file mode 100644 index 0668d94c35077..0000000000000 --- a/sycl/test-e2e/Graph/Explicit/executable_graph_update_ordering.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out -// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG -// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// Extra run to check for immediate-command-list in Level Zero -// RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} - -// REQUIRES: aspect-usm_shared_allocations - -// Skip as executable graph update and host tasks both aren't -// implemented. -// REQUIRES: NOT_YET_IMPLEMENTED - -#define GRAPH_E2E_EXPLICIT - -#include "../Update/whole_update_delay.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/double_buffer.cpp b/sycl/test-e2e/Graph/Explicit/whole_update_double_buffer.cpp similarity index 78% rename from sycl/test-e2e/Graph/Explicit/double_buffer.cpp rename to sycl/test-e2e/Graph/Explicit/whole_update_double_buffer.cpp index a9db218bc9271..5b5ab7626b44c 100644 --- a/sycl/test-e2e/Graph/Explicit/double_buffer.cpp +++ b/sycl/test-e2e/Graph/Explicit/whole_update_double_buffer.cpp @@ -4,11 +4,7 @@ // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// - -// Skip as executable graph update isn't implemented yet -// REQUIRES: NOT_YET_IMPLEMENTED #define GRAPH_E2E_EXPLICIT -#include "../Update/whole_update_double_buffer.cpp" +#include "../Inputs/whole_update_double_buffer.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/executable_graph_update.cpp b/sycl/test-e2e/Graph/Explicit/whole_update_subgraph.cpp similarity index 80% rename from sycl/test-e2e/Graph/Explicit/executable_graph_update.cpp rename to sycl/test-e2e/Graph/Explicit/whole_update_subgraph.cpp index ca4d30781c5de..0ea0ccc34fc60 100644 --- a/sycl/test-e2e/Graph/Explicit/executable_graph_update.cpp +++ b/sycl/test-e2e/Graph/Explicit/whole_update_subgraph.cpp @@ -4,11 +4,9 @@ // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// -// Skip as executable graph update not implemented yet -// REQUIRES: NOT_YET_IMPLEMENTED +// REQUIRES: aspect-usm_shared_allocations #define GRAPH_E2E_EXPLICIT -#include "../Update/whole_update_usm.cpp" +#include "../Inputs/whole_update_subgraph.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/whole_update_usm.cpp b/sycl/test-e2e/Graph/Explicit/whole_update_usm.cpp new file mode 100644 index 0000000000000..50c6bdbe86df6 --- /dev/null +++ b/sycl/test-e2e/Graph/Explicit/whole_update_usm.cpp @@ -0,0 +1,10 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +#define GRAPH_E2E_EXPLICIT + +#include "../Inputs/whole_update_usm.cpp" diff --git a/sycl/test-e2e/Graph/Update/whole_update_double_buffer.cpp b/sycl/test-e2e/Graph/Inputs/whole_update_double_buffer.cpp similarity index 84% rename from sycl/test-e2e/Graph/Update/whole_update_double_buffer.cpp rename to sycl/test-e2e/Graph/Inputs/whole_update_double_buffer.cpp index d0f075d2225db..10c2e45b6f5b5 100644 --- a/sycl/test-e2e/Graph/Update/whole_update_double_buffer.cpp +++ b/sycl/test-e2e/Graph/Inputs/whole_update_double_buffer.cpp @@ -1,16 +1,6 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out -// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG -// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// Extra run to check for immediate-command-list in Level Zero -// RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// -// UNSUPPORTED: opencl, level_zero - // Tests executable graph update by creating a double buffering scenario, where // a single graph is repeatedly executed then updated to swap between two sets // of buffers. -#define GRAPH_E2E_EXPLICIT #include "../graph_common.hpp" diff --git a/sycl/test-e2e/Graph/Inputs/whole_update_subgraph.cpp b/sycl/test-e2e/Graph/Inputs/whole_update_subgraph.cpp new file mode 100644 index 0000000000000..00c3b455b15c5 --- /dev/null +++ b/sycl/test-e2e/Graph/Inputs/whole_update_subgraph.cpp @@ -0,0 +1,73 @@ +// Tests that whole graph update works when using sub-graphs. + +#include "../graph_common.hpp" + +template +void constructGraphs( + queue Queue, exp_ext::command_graph Graph, + exp_ext::command_graph SubGraph, + T *Data) { + + add_node(SubGraph, Queue, [&](handler &CGH) { + CGH.parallel_for(range<1>(Size), [=](item<1> Id) { Data[Id] += 2; }); + }); + + exp_ext::command_graph SubGraphExec = SubGraph.finalize(); + + auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { + CGH.parallel_for(range<1>(Size), [=](item<1> Id) { Data[Id] += 1; }); + }); + + auto NodeB = add_node(Graph, Queue, [&](handler &CGH) { + depends_on_helper(CGH, NodeA); + CGH.ext_oneapi_graph(SubGraphExec); + }); +} + +int main() { + queue Queue{}; + + using T = int; + + std::vector DataHost(Size, 1); + std::vector DataHostUpdate(Size, 1); + T *DataDevice = malloc_device(Size, Queue); + T *DataDeviceUpdate = malloc_device(Size, Queue); + Queue.copy(DataHost.data(), DataDevice, Size); + Queue.copy(DataHost.data(), DataDeviceUpdate, Size); + + exp_ext::command_graph SubGraphA{Queue.get_context(), Queue.get_device()}; + exp_ext::command_graph SubGraphB{Queue.get_context(), Queue.get_device()}; + + exp_ext::command_graph GraphA{Queue.get_context(), Queue.get_device()}; + exp_ext::command_graph GraphB{Queue.get_context(), Queue.get_device()}; + + constructGraphs(Queue, GraphA, SubGraphA, DataDevice); + auto GraphExec = GraphA.finalize(exp_ext::property::graph::updatable{}); + Queue.ext_oneapi_graph(GraphExec).wait(); + + constructGraphs(Queue, GraphB, SubGraphB, DataDeviceUpdate); + + bool GotException = false; + try { + GraphExec.update(GraphB); + } catch (sycl::exception &e) { + // TODO The subgraph update feature is not implemented yet. For now this + // is the expected behaviour. + return 0; + } + assert(!GotException); + + Queue.ext_oneapi_graph(GraphExec).wait(); + + Queue.copy(DataDevice, DataHost.data(), Size); + Queue.copy(DataDeviceUpdate, DataHostUpdate.data(), Size); + Queue.wait_and_throw(); + + for (size_t i = 0; i < Size; i++) { + assert(check_value(i, 4, DataHost[i], "DataHost")); + assert(check_value(i, 4, DataHostUpdate[i], "DataHostUpdate")); + } + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/whole_update_usm.cpp b/sycl/test-e2e/Graph/Inputs/whole_update_usm.cpp similarity index 84% rename from sycl/test-e2e/Graph/Update/whole_update_usm.cpp rename to sycl/test-e2e/Graph/Inputs/whole_update_usm.cpp index 4aaed6364dd6f..91db22c3db2c2 100644 --- a/sycl/test-e2e/Graph/Update/whole_update_usm.cpp +++ b/sycl/test-e2e/Graph/Inputs/whole_update_usm.cpp @@ -1,15 +1,5 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out -// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG -// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// Extra run to check for immediate-command-list in Level Zero -// RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// -// UNSUPPORTED: opencl, level_zero - // Tests executable graph update by creating two graphs with USM ptrs and // attempting to update one from the other. -#define GRAPH_E2E_EXPLICIT #include "../graph_common.hpp" diff --git a/sycl/test-e2e/Graph/README.md b/sycl/test-e2e/Graph/README.md new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sycl/test-e2e/Graph/RecordReplay/executable_graph_update_ordering.cpp b/sycl/test-e2e/Graph/RecordReplay/executable_graph_update_ordering.cpp deleted file mode 100644 index aaf5841587b5a..0000000000000 --- a/sycl/test-e2e/Graph/RecordReplay/executable_graph_update_ordering.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out -// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG -// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// Extra run to check for immediate-command-list in Level Zero -// RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} - -// REQUIRES: aspect-usm_shared_allocations - -// Skip as executable graph update and host tasks both aren't -// implemented. -// REQUIRES: NOT_YET_IMPLEMENTED - -#define GRAPH_E2E_RECORD_REPLAY - -#include "../Update/whole_update_delay.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/whole_update_double_buffer.cpp b/sycl/test-e2e/Graph/RecordReplay/whole_update_double_buffer.cpp new file mode 100644 index 0000000000000..84e6c96c979c1 --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/whole_update_double_buffer.cpp @@ -0,0 +1,10 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +#define GRAPH_E2E_RECORD_REPLAY + +#include "../Inputs/whole_update_double_buffer.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/executable_graph_update.cpp b/sycl/test-e2e/Graph/RecordReplay/whole_update_subgraph.cpp similarity index 80% rename from sycl/test-e2e/Graph/RecordReplay/executable_graph_update.cpp rename to sycl/test-e2e/Graph/RecordReplay/whole_update_subgraph.cpp index e543528555ac4..dce4628d73bb3 100644 --- a/sycl/test-e2e/Graph/RecordReplay/executable_graph_update.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/whole_update_subgraph.cpp @@ -4,11 +4,9 @@ // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// -// Skip as executable graph update not implemented yet -// REQUIRES: NOT_YET_IMPLEMENTED +// REQUIRES: aspect-usm_shared_allocations #define GRAPH_E2E_RECORD_REPLAY -#include "../Update/whole_update_usm.cpp" +#include "../Inputs/whole_update_subgraph.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/double_buffer.cpp b/sycl/test-e2e/Graph/RecordReplay/whole_update_usm.cpp similarity index 79% rename from sycl/test-e2e/Graph/RecordReplay/double_buffer.cpp rename to sycl/test-e2e/Graph/RecordReplay/whole_update_usm.cpp index 76700d8815603..cd5afb58bfd7f 100644 --- a/sycl/test-e2e/Graph/RecordReplay/double_buffer.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/whole_update_usm.cpp @@ -4,11 +4,9 @@ // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// -// Skip as executable graph update not yet implemented -// REQUIRES: NOT_YET_IMPLEMENTED +// REQUIRES: aspect-usm_shared_allocations #define GRAPH_E2E_RECORD_REPLAY -#include "../Update/whole_update_double_buffer.cpp" +#include "../Inputs/whole_update_usm.cpp" diff --git a/sycl/test-e2e/Graph/Update/whole_update_delay.cpp b/sycl/test-e2e/Graph/Update/whole_update_delay.cpp deleted file mode 100644 index 25a3b6186f5fe..0000000000000 --- a/sycl/test-e2e/Graph/Update/whole_update_delay.cpp +++ /dev/null @@ -1,138 +0,0 @@ -// Tests executable graph update by introducing a delay in to the update -// transactions dependencies to check correctness of behaviour. -// TODO This test is disabled because host-tasks are not supported for graph -// updates yet. -#include "../graph_common.hpp" -#include - -int main() { - queue Queue{}; - - using T = int; - - std::vector DataA(Size), DataB(Size), DataC(Size); - std::vector HostTaskOutput(Size); - - std::iota(DataA.begin(), DataA.end(), 1); - std::iota(DataB.begin(), DataB.end(), 10); - std::iota(DataC.begin(), DataC.end(), 1000); - - auto DataA2 = DataA; - auto DataB2 = DataB; - auto DataC2 = DataC; - - std::vector ReferenceA(DataA), ReferenceB(DataB), ReferenceC(DataC); - calculate_reference_data(Iterations, Size, ReferenceA, ReferenceB, - ReferenceC); - - exp_ext::command_graph GraphA{Queue.get_context(), Queue.get_device()}; - - T *PtrA = malloc_shared(Size, Queue); - T *PtrB = malloc_shared(Size, Queue); - T *PtrC = malloc_shared(Size, Queue); - T *PtrOut = malloc_shared(Size, Queue); - - Queue.copy(DataA.data(), PtrA, Size); - Queue.copy(DataB.data(), PtrB, Size); - Queue.copy(DataC.data(), PtrC, Size); - Queue.wait_and_throw(); - - // Add commands to first graph - auto NodeA = add_nodes(GraphA, Queue, Size, PtrA, PtrB, PtrC); - - // host task to induce a wait for dependencies - add_node( - Graph, Queue, - [&](handler &CGH) { - CGH.host_task([=]() { - for (size_t i = 0; i < Size; i++) { - PtrOut[i] = PtrC[i]; - } - std::this_thread::sleep_for(std::chrono::milliseconds(500)); - }); - }, - NodeA); - - auto GraphExec = GraphA.finalize(); - - exp_ext::command_graph GraphB{Queue.get_context(), Queue.get_device()}; - - T *PtrA2 = malloc_shared(Size, Queue); - T *PtrB2 = malloc_shared(Size, Queue); - T *PtrC2 = malloc_shared(Size, Queue); - - Queue.copy(DataA2.data(), PtrA2, Size); - Queue.copy(DataB2.data(), PtrB2, Size); - Queue.copy(DataC2.data(), PtrC2, Size); - Queue.wait_and_throw(); - - // Adds commands to second graph - auto NodeB = add_nodes(GraphB, Queue, Size, PtrA2, PtrB2, PtrC2); - - // host task to match the graph topology, but we don't need to sleep this - // time because there is no following update. - add_node( - Graph, Queue, - [&](handler &CGH) { - // This should be access::target::host_task but it has not been - // implemented yet. - CGH.host_task([=]() { - for (size_t i = 0; i < Size; i++) { - PtrOut[i] = PtrC2[i]; - } - }); - }, - NodeB); - - event Event; - for (unsigned n = 0; n < Iterations; n++) { - Event = Queue.submit([&](handler &CGH) { - CGH.depends_on(Event); - CGH.ext_oneapi_graph(GraphExec); - }); - } - - GraphExec.update(GraphB); - - // Execute several Iterations of the graph for 2nd set of buffers - for (unsigned n = 0; n < Iterations; n++) { - Event = Queue.submit([&](handler &CGH) { - CGH.depends_on(Event); - CGH.ext_oneapi_graph(GraphExec); - }); - } - - Queue.wait_and_throw(); - - Queue.copy(PtrA, DataA.data(), Size); - Queue.copy(PtrB, DataB.data(), Size); - Queue.copy(PtrC, DataC.data(), Size); - Queue.copy(PtrOut, HostTaskOutput.data(), Size); - - Queue.copy(PtrA2, DataA.data(), Size); - Queue.copy(PtrB2, DataB.data(), Size); - Queue.copy(PtrC2, DataC.data(), Size); - Queue.wait_and_throw(); - - free(PtrA, Queue); - free(PtrB, Queue); - free(PtrC, Queue); - free(PtrOut, Queue); - - free(PtrA2, Queue); - free(PtrB2, Queue); - free(PtrC2, Queue); - - for (size_t i = 0; i < Size; i++) { - assert(check_value(i, ReferenceA[i], DataA[i], "DataA")); - assert(check_value(i, ReferenceB[i], DataB[i], "DataB")); - assert(check_value(i, ReferenceC[i], DataC[i], "DataC")); - assert(check_value(i, ReferenceC[i], HostTaskOutput[i], "HostTaskOutput")); - - assert(check_value(i, ReferenceA[i], DataA2[i], "DataA2")); - assert(check_value(i, ReferenceB[i], DataB2[i], "DataB2")); - assert(check_value(i, ReferenceC[i], DataC2[i], "DataC2")); - } - - return 0; -} diff --git a/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp b/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp index 131b265eeeee0..3a47363ad0cc1 100644 --- a/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp +++ b/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp @@ -4,10 +4,11 @@ // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// -// UNSUPPORTED: opencl, level_zero + +// REQUIRES: aspect-usm_shared_allocations // Tests that whole graph update works when using dynamic parameters. + #include "../graph_common.hpp" int main() { @@ -34,7 +35,7 @@ int main() { exp_ext::command_graph GraphA{Queue.get_context(), Queue.get_device()}; exp_ext::dynamic_parameter InputParam(GraphA, InputDataDevice1); - auto GraphANode = GraphA.add([&](handler &CGH) { + GraphA.add([&](handler &CGH) { CGH.set_arg(1, InputParam); CGH.single_task([=]() { for (size_t i = 0; i < Size; i++) { @@ -50,13 +51,14 @@ int main() { Queue.wait_and_throw(); for (size_t i = 0; i < Size; i++) { - assert(check_value(i, InputDataHost1[i], OutputDataHost1[i], "OutputDataHost1")); + assert(check_value(i, InputDataHost1[i], OutputDataHost1[i], + "OutputDataHost1")); } InputParam.update(InputDataDevice2); exp_ext::command_graph GraphB{Queue.get_context(), Queue.get_device()}; - auto GraphBNode = GraphB.add([&](handler &CGH) { + GraphB.add([&](handler &CGH) { CGH.single_task([=]() { for (size_t i = 0; i < Size; i++) { OutputDataDevice1[i] = InputDataDevice1[i]; @@ -76,7 +78,8 @@ int main() { free(OutputDataDevice1, Queue); for (size_t i = 0; i < Size; i++) { - assert(check_value(i, InputDataHost2[i], OutputDataHost1[i], "OutputDataHost1")); + assert(check_value(i, InputDataHost2[i], OutputDataHost1[i], + "OutputDataHost1")); } return 0; diff --git a/sycl/unittests/Extensions/CommandGraph/Common.hpp b/sycl/unittests/Extensions/CommandGraph/Common.hpp index 2056846f92ac3..a2e0965572cbf 100644 --- a/sycl/unittests/Extensions/CommandGraph/Common.hpp +++ b/sycl/unittests/Extensions/CommandGraph/Common.hpp @@ -20,7 +20,6 @@ using namespace sycl; using namespace sycl::ext::oneapi; -namespace exp_ext = sycl::ext::oneapi::experimental; // Common Test fixture class CommandGraphTest : public ::testing::Test { diff --git a/sycl/unittests/Extensions/CommandGraph/Update.cpp b/sycl/unittests/Extensions/CommandGraph/Update.cpp index 05d077b32d556..63cd7871b5b99 100644 --- a/sycl/unittests/Extensions/CommandGraph/Update.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Update.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "Common.hpp" -#include "sycl/exception.hpp" using namespace sycl; using namespace sycl::ext::oneapi; @@ -176,20 +175,20 @@ TEST_F(WholeGraphUpdateTest, NoUpdates) { auto NodeA = Graph.add(EmptyKernel); auto NodeB = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeA)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeA)); auto NodeC = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeA)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeA)); auto NodeD = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeB, NodeC)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeB, NodeC)); auto UpdateNodeA = UpdateGraph.add(EmptyKernel); auto UpdateNodeB = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on(UpdateNodeA)); + EmptyKernel, experimental::property::node::depends_on(UpdateNodeA)); auto UpdateNodeC = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on(UpdateNodeA)); + EmptyKernel, experimental::property::node::depends_on(UpdateNodeA)); auto UpdateNodeD = UpdateGraph.add( EmptyKernel, - exp_ext::property::node::depends_on(UpdateNodeB, UpdateNodeC)); + experimental::property::node::depends_on(UpdateNodeB, UpdateNodeC)); auto GraphExec = Graph.finalize(experimental::property::graph::updatable{}); EXPECT_NO_THROW(GraphExec.update(UpdateGraph)); @@ -200,20 +199,20 @@ TEST_F(WholeGraphUpdateTest, MoreNodes) { auto NodeA = Graph.add(EmptyKernel); auto NodeB = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeA)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeA)); auto NodeC = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeA)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeA)); auto NodeD = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeB, NodeC)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeB, NodeC)); auto UpdateNodeA = UpdateGraph.add(EmptyKernel); auto UpdateNodeB = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on(UpdateNodeA)); + EmptyKernel, experimental::property::node::depends_on(UpdateNodeA)); auto UpdateNodeC = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on(UpdateNodeA)); + EmptyKernel, experimental::property::node::depends_on(UpdateNodeA)); auto UpdateNodeD = UpdateGraph.add( EmptyKernel, - exp_ext::property::node::depends_on(UpdateNodeB, UpdateNodeC)); + experimental::property::node::depends_on(UpdateNodeB, UpdateNodeC)); // NodeE is the extra node auto UpdateNodeE = UpdateGraph.add(EmptyKernel); @@ -226,17 +225,17 @@ TEST_F(WholeGraphUpdateTest, LessNodes) { auto NodeA = Graph.add(EmptyKernel); auto NodeB = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeA)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeA)); auto NodeC = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeA)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeA)); auto NodeD = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeB, NodeC)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeB, NodeC)); auto UpdateNodeA = UpdateGraph.add(EmptyKernel); auto UpdateNodeB = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on(UpdateNodeA)); + EmptyKernel, experimental::property::node::depends_on(UpdateNodeA)); auto UpdateNodeC = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on(UpdateNodeA)); + EmptyKernel, experimental::property::node::depends_on(UpdateNodeA)); // NodeD is missing in the update auto GraphExec = Graph.finalize(experimental::property::graph::updatable{}); @@ -248,19 +247,19 @@ TEST_F(WholeGraphUpdateTest, ExtraEdges) { auto NodeA = Graph.add(EmptyKernel); auto NodeB = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeA)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeA)); auto NodeC = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeA)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeA)); auto NodeD = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeB, NodeC)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeB, NodeC)); auto UpdateNodeA = UpdateGraph.add(EmptyKernel); auto UpdateNodeB = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on(UpdateNodeA)); + EmptyKernel, experimental::property::node::depends_on(UpdateNodeA)); auto UpdateNodeC = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on(UpdateNodeA)); + EmptyKernel, experimental::property::node::depends_on(UpdateNodeA)); auto UpdateNodeD = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on( + EmptyKernel, experimental::property::node::depends_on( UpdateNodeA, UpdateNodeB, UpdateNodeC /* Extra Edge */)); auto GraphExec = Graph.finalize(experimental::property::graph::updatable{}); @@ -272,46 +271,49 @@ TEST_F(WholeGraphUpdateTest, MissingEdges) { auto NodeA = Graph.add(EmptyKernel); auto NodeB = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeA)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeA)); auto NodeC = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeA)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeA)); auto NodeD = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeB, NodeC)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeB, NodeC)); auto UpdateNodeA = UpdateGraph.add(EmptyKernel); auto UpdateNodeB = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on(UpdateNodeA)); + EmptyKernel, experimental::property::node::depends_on(UpdateNodeA)); auto UpdateNodeC = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on(UpdateNodeA)); + EmptyKernel, experimental::property::node::depends_on(UpdateNodeA)); auto UpdateNodeD = UpdateGraph.add( EmptyKernel, - exp_ext::property::node::depends_on(/* Missing Edge */ UpdateNodeB)); + experimental::property::node::depends_on(/* Missing Edge */ UpdateNodeB)); auto GraphExec = Graph.finalize(experimental::property::graph::updatable{}); EXPECT_THROW(GraphExec.update(UpdateGraph), sycl::exception); } -// FIXME TODO Is this an error or not? TEST_F(WholeGraphUpdateTest, WrongOrderEdges) { // Test that using an update graph with edges added in a different order // does not result in an error. auto NodeA = Graph.add(EmptyKernel); - auto NodeB = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeA)); - auto NodeC = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeA)); - auto NodeD = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeB, NodeC)); + auto NodeB = Graph.add(EmptyKernel); + auto NodeC = Graph.add(EmptyKernel); + auto NodeD = Graph.add(EmptyKernel); + + Graph.make_edge(NodeA, NodeB); + Graph.make_edge(NodeA, NodeC); + Graph.make_edge(NodeB, NodeD); + Graph.make_edge(NodeC, NodeD); auto UpdateNodeA = UpdateGraph.add(EmptyKernel); - auto UpdateNodeB = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on(UpdateNodeA)); - auto UpdateNodeC = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on(UpdateNodeA)); - auto UpdateNodeD = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on( - UpdateNodeC, UpdateNodeB /* Reversed Edges */)); + auto UpdateNodeB = UpdateGraph.add(EmptyKernel); + auto UpdateNodeC = UpdateGraph.add(EmptyKernel); + auto UpdateNodeD = UpdateGraph.add(EmptyKernel); + + UpdateGraph.make_edge(UpdateNodeA, UpdateNodeB); + UpdateGraph.make_edge(UpdateNodeA, UpdateNodeC); + // Create the edge C->D before B->D, which is the reverse order of `Graph`. + UpdateGraph.make_edge(UpdateNodeC, UpdateNodeD); + UpdateGraph.make_edge(UpdateNodeB, UpdateNodeD); auto GraphExec = Graph.finalize(experimental::property::graph::updatable{}); EXPECT_NO_THROW(GraphExec.update(UpdateGraph)); @@ -321,30 +323,30 @@ TEST_F(WholeGraphUpdateTest, UnsupportedNodeType) { // Test that using an update graph that contains unsupported node types // results in an error. buffer Buffer{range<1>{1}}; - +wro auto NodeA = Graph.add(EmptyKernel); auto NodeB = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeA)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeA)); auto NodeC = - Graph.add(EmptyKernel, exp_ext::property::node::depends_on(NodeA)); + Graph.add(EmptyKernel, experimental::property::node::depends_on(NodeA)); auto NodeD = Graph.add( [&](handler &CGH) { auto Acc = Buffer.get_access(CGH); CGH.fill(Acc, 1); }, - exp_ext::property::node::depends_on(NodeB, NodeC)); + experimental::property::node::depends_on(NodeB, NodeC)); auto UpdateNodeA = UpdateGraph.add(EmptyKernel); auto UpdateNodeB = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on(UpdateNodeA)); + EmptyKernel, experimental::property::node::depends_on(UpdateNodeA)); auto UpdateNodeC = UpdateGraph.add( - EmptyKernel, exp_ext::property::node::depends_on(UpdateNodeA)); + EmptyKernel, experimental::property::node::depends_on(UpdateNodeA)); auto UpdateNodeD = Graph.add( [&](handler &CGH) { auto Acc = Buffer.get_access(CGH); CGH.fill(Acc, 1); }, - exp_ext::property::node::depends_on(UpdateNodeB, UpdateNodeC)); + experimental::property::node::depends_on(UpdateNodeB, UpdateNodeC)); auto GraphExec = Graph.finalize(experimental::property::graph::updatable{}); EXPECT_THROW(GraphExec.update(UpdateGraph), sycl::exception);