diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_in_order_queue_events.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_in_order_queue_events.asciidoc new file mode 100644 index 0000000000000..012e9efd7de3f --- /dev/null +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_in_order_queue_events.asciidoc @@ -0,0 +1,151 @@ += sycl_ext_oneapi_in_order_queue_events + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en +:dpcpp: pass:[DPC++] + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + + +== Notice + +[%hardbreaks] +Copyright (C) 2024-2024 Intel Corporation. All rights reserved. + +Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks +of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by +permission by Khronos. + + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm/issues + + +== Dependencies + +This extension is written against the SYCL 2020 revision 8 specification. All +references below to the "core SYCL specification" or to section numbers in the +SYCL specification refer to that revision. + + +== Status + +This is an experimental extension specification, intended to provide early +access to features and gather community feedback. Interfaces defined in this +specification are implemented in {dpcpp}, but they are not finalized and may +change incompatibly in future versions of {dpcpp} without prior notice. +*Shipping software products should not rely on APIs defined in this +specification.* + + +== Overview + +SYCL 2020 in-order queues allow for simple control of submission ordering, i.e. +commands are executed in the order they are submitted. This extension adds two +additional APIs for controlling in-order queues: Getting the event from the last +command submission into the queue and setting an external event as an implicit +dependence on the next command submitted to the queue. + +This extension exists to solve a specific problem, and a general solution is +still being evaluated. It is not recommended for general usage. + + +== Specification + +=== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification. An implementation supporting this extension must predefine the +macro `SYCL_EXT_ONEAPI_IN_ORDER_QUEUE_EVENTS` to one of the values defined in +the table below. Applications can test for the existence of this macro to +determine if the implementation supports this feature, or applications can test +the macro's value to determine which of the extension's features the +implementation supports. + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|The APIs of this experimental extension are not versioned, so the + feature-test macro always has this value. +|=== + +=== New SYCL queue APIs + +This extension adds the following new APIs to the existing `sycl::queue` class: + +[source, c++] +---- +namespace sycl { + +class queue { + ... + + event ext_oneapi_get_last_event() const { /*...*/ } + + void ext_oneapi_set_external_event(const event &external_event) { /*...*/ } +} + +} // namespace sycl +---- + +These new APIs have the following behaviour: + +-- +[options="header"] +|==== +| Function Definition | Description +a| +[source, c++] +---- +event ext_oneapi_get_last_event() const; +---- +| Returns an event representing the execution of the last command submitted to +the queue. + +Calls to this member function throw a `sycl::exception` with `errc::invalid` if +the queue does not have the `property::queue::in_order` property. + +Calls to this member function throw a `sycl::exception` with `errc::invalid` if +the queue has the `ext::oneapi::property::queue::discard_events` property from +the +link:../supported/sycl_ext_oneapi_discard_queue_events.asciidoc[sycl_ext_oneapi_discard_queue_events extension]. + +a| +[source, c++] +---- +void ext_oneapi_set_external_event(const event &externalEvent); +---- +| Sets an event to be used as an additional dependency of the next command +submission to the queue. Subsequent calls to this function will overwrite the +event of the previous call, resulting in only the `externalEvent` from the last +call to this function being a dependency of the next command submission. + +This is equivalent to calling `handler::depends_on()` in a command submission +with the `externalEvent` from the most recent call to this member function since +the previous command submission to the same queue. + +Calls to this member function throw a `sycl::exception` with `errc::invalid` if +the queue does not have the `property::queue::in_order` property. + +Calls to this member function throw a `sycl::exception` with `errc::invalid` if +the queue has the `ext::oneapi::property::queue::discard_events` property from +the +link:../supported/sycl_ext_oneapi_discard_queue_events.asciidoc[sycl_ext_oneapi_discard_queue_events extension]. +|==== +-- diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 37a6af52b4064..23008e75b80fb 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -2783,6 +2783,10 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { pi_native_handle getNative(int32_t &NativeHandleDesc) const; + event ext_oneapi_get_last_event() const; + + void ext_oneapi_set_external_event(const event &external_event); + private: std::shared_ptr impl; queue(std::shared_ptr impl) : impl(impl) {} diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index c3fd4cd5071c3..2551f02ab1b55 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -61,6 +61,20 @@ static event createDiscardedEvent() { return createSyclObjFromImpl(EventImpl); } +const std::vector & +queue_impl::getExtendDependencyList(const std::vector &DepEvents, + std::vector &MutableVec) { + if (isInOrder()) { + std::optional ExternalEvent = popExternalEvent(); + if (ExternalEvent) { + MutableVec = DepEvents; + MutableVec.push_back(*ExternalEvent); + return MutableVec; + } + } + return DepEvents; +} + event queue_impl::memset(const std::shared_ptr &Self, void *Ptr, int Value, size_t Count, const std::vector &DepEvents) { @@ -108,9 +122,13 @@ event queue_impl::memset(const std::shared_ptr &Self, if (isInOrder() && MLastCGType == CG::CGTYPE::CodeplayHostTask) MLastEvent.wait(); + std::vector MutableDepEvents; + const std::vector &ExpandedDepEvents = + getExtendDependencyList(DepEvents, MutableDepEvents); + auto EventImpl = detail::getSyclObjImpl(ResEvent); MemoryManager::fill_usm(Ptr, Self, Count, Value, - getOrWaitEvents(DepEvents, MContext), + getOrWaitEvents(ExpandedDepEvents, MContext), &EventImpl->getHandleRef(), EventImpl); if (MContext->is_host()) @@ -201,9 +219,13 @@ event queue_impl::memcpy(const std::shared_ptr &Self, if (isInOrder() && MLastCGType == CG::CGTYPE::CodeplayHostTask) MLastEvent.wait(); + std::vector MutableDepEvents; + const std::vector &ExpandedDepEvents = + getExtendDependencyList(DepEvents, MutableDepEvents); + auto EventImpl = detail::getSyclObjImpl(ResEvent); MemoryManager::copy_usm(Src, Self, Count, Dest, - getOrWaitEvents(DepEvents, MContext), + getOrWaitEvents(ExpandedDepEvents, MContext), &EventImpl->getHandleRef(), EventImpl); if (MContext->is_host()) @@ -244,9 +266,13 @@ event queue_impl::mem_advise(const std::shared_ptr &Self, if (isInOrder() && MLastCGType == CG::CGTYPE::CodeplayHostTask) MLastEvent.wait(); + std::vector MutableDepEvents; + const std::vector &ExpandedDepEvents = + getExtendDependencyList(DepEvents, MutableDepEvents); + auto EventImpl = detail::getSyclObjImpl(ResEvent); MemoryManager::advise_usm(Ptr, Self, Length, Advice, - getOrWaitEvents(DepEvents, MContext), + getOrWaitEvents(ExpandedDepEvents, MContext), &EventImpl->getHandleRef(), EventImpl); if (MContext->is_host()) @@ -288,11 +314,15 @@ event queue_impl::memcpyToDeviceGlobal( if (isInOrder() && MLastCGType == CG::CGTYPE::CodeplayHostTask) MLastEvent.wait(); + std::vector MutableDepEvents; + const std::vector &ExpandedDepEvents = + getExtendDependencyList(DepEvents, MutableDepEvents); + auto EventImpl = detail::getSyclObjImpl(ResEvent); - MemoryManager::copy_to_device_global(DeviceGlobalPtr, IsDeviceImageScope, - Self, NumBytes, Offset, Src, - getOrWaitEvents(DepEvents, MContext), - &EventImpl->getHandleRef(), EventImpl); + MemoryManager::copy_to_device_global( + DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Src, + getOrWaitEvents(ExpandedDepEvents, MContext), + &EventImpl->getHandleRef(), EventImpl); if (MContext->is_host()) return MDiscardEvents ? createDiscardedEvent() : event(); @@ -333,11 +363,15 @@ event queue_impl::memcpyFromDeviceGlobal( if (isInOrder() && MLastCGType == CG::CGTYPE::CodeplayHostTask) MLastEvent.wait(); + std::vector MutableDepEvents; + const std::vector &ExpandedDepEvents = + getExtendDependencyList(DepEvents, MutableDepEvents); + auto EventImpl = detail::getSyclObjImpl(ResEvent); MemoryManager::copy_from_device_global( DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest, - getOrWaitEvents(DepEvents, MContext), &EventImpl->getHandleRef(), - EventImpl); + getOrWaitEvents(ExpandedDepEvents, MContext), + &EventImpl->getHandleRef(), EventImpl); if (MContext->is_host()) return MDiscardEvents ? createDiscardedEvent() : event(); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index a5d556a7f60a5..0c27a177dbf1a 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -716,6 +716,22 @@ class queue_impl { unsigned long long getQueueID() { return MQueueID; } + void setExternalEvent(const event &Event) { + std::lock_guard Lock(MInOrderExternalEventMtx); + MInOrderExternalEvent = Event; + } + + std::optional popExternalEvent() { + std::lock_guard Lock(MInOrderExternalEventMtx); + std::optional Result = std::nullopt; + std::swap(Result, MInOrderExternalEvent); + return Result; + } + + const std::vector & + getExtendDependencyList(const std::vector &DepEvents, + std::vector &MutableVec); + protected: // Hook to the scheduler to clean up any fusion command held on destruction. void cleanup_fusion_cmd(); @@ -731,8 +747,8 @@ class queue_impl { }; // Accessing and changing of an event isn't atomic operation. - // Hence, here is the lock for thread-safety. - std::lock_guard Lock{MLastEventMtx}; + // Hence, here is are locks for thread-safety. + std::lock_guard LastEventLock{MLastEventMtx}; if (MLastCGType == CG::CGTYPE::None) MLastCGType = Type; @@ -744,6 +760,13 @@ class queue_impl { if (NeedSeparateDependencyMgmt) Handler.depends_on(MLastEvent); + // If there is an external event set, add it as a dependency and clear it. + // We do not need to hold the lock as MLastEventMtx will ensure the last + // event reflects the corresponding external event dependence as well. + std::optional ExternalEvent = popExternalEvent(); + if (ExternalEvent) + Handler.depends_on(*ExternalEvent); + EventRet = Handler.finalize(); MLastEvent = EventRet; @@ -894,6 +917,13 @@ class queue_impl { // the fallback implementation of profiling info bool MFallbackProfiling = false; + // This event can be optionally provided by users for in-order queues to add + // an additional dependency for the subsequent submission in to the queue. + // Access to the event should be guarded with MInOrderExternalEventMtx. + // NOTE: std::optional must not be exposed in the ABI. + std::optional MInOrderExternalEvent; + mutable std::mutex MInOrderExternalEventMtx; + public: // Queue constructed with the discard_events property const bool MDiscardEvents; diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index eee96f9ffa4f8..76b1bdfe11a89 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -97,6 +97,7 @@ inline namespace _V1 { #define SYCL_EXT_INTEL_CACHE_CONTROLS 1 #define SYCL_EXT_INTEL_FP_CONTROL 1 #define SYCL_EXT_ONEAPI_NON_UNIFORM_GROUPS 1 +#define SYCL_EXT_ONEAPI_IN_ORDER_QUEUE_EVENTS 1 #ifndef __has_include #define __has_include(x) 0 diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 25f13a402c561..109e6396a0341 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -300,5 +300,31 @@ bool queue::ext_codeplay_supports_fusion() const { ext::codeplay::experimental::property::queue::enable_fusion>(); } +event queue::ext_oneapi_get_last_event() const { + if (!is_in_order()) + throw sycl::exception( + make_error_code(errc::invalid), + "ext_oneapi_get_last_event() can only be called on in-order queues."); + if (impl->MDiscardEvents) + throw sycl::exception( + make_error_code(errc::invalid), + "ext_oneapi_get_last_event() cannot be called on queues with the " + "ext::oneapi::property::queue::discard_events property."); + return impl->getLastEvent(); +} + +void queue::ext_oneapi_set_external_event(const event &external_event) { + if (!is_in_order()) + throw sycl::exception(make_error_code(errc::invalid), + "ext_oneapi_set_external_event() can only be called " + "on in-order queues."); + if (impl->MDiscardEvents) + throw sycl::exception( + make_error_code(errc::invalid), + "ext_oneapi_set_external_event() cannot be called on queues with the " + "ext::oneapi::property::queue::discard_events property."); + return impl->setExternalEvent(external_event); +} + } // namespace _V1 } // namespace sycl diff --git a/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp b/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp new file mode 100644 index 0000000000000..3393202b5a370 --- /dev/null +++ b/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp @@ -0,0 +1,58 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// Tests the ext_oneapi_get_last_event extension member on in-order queues. +// NOTE: The extension does not guarantee that the SYCL events returned by this +// extension API are equal to the ones returned by the latest submission, +// only that the underlying native events are. Currently DPC++ implements +// this in a way that guarantees it, but this can change in the future. +// If it changes then so should this test. + +#include +#include + +template +int Check(const sycl::queue &Q, const char *CheckName, const F &CheckFunc) { + sycl::event E = CheckFunc(); + if (E != Q.ext_oneapi_get_last_event()) { + std::cout << "Failed " << CheckName << std::endl; + return 1; + } + return 0; +} + +int main() { + sycl::queue Q{{sycl::property::queue::in_order{}}}; + + int Failed = 0; + + Failed += Check(Q, "single_task", [&]() { return Q.single_task([]() {}); }); + + Failed += Check(Q, "parallel_for", + [&]() { return Q.parallel_for(32, [](sycl::id<1>) {}); }); + + Failed += Check(Q, "host_task", [&]() { + return Q.submit([&](sycl::handler &CGH) { CGH.host_task([]() {}); }); + }); + + constexpr size_t N = 64; + int *Data1 = sycl::malloc_shared(N, Q); + int *Data2 = sycl::malloc_shared(N, Q); + + Failed += Check(Q, "fill", [&]() { return Q.fill(Data1, 0, N); }); + + Failed += + Check(Q, "memset", [&]() { return Q.memset(Data1, 0, N * sizeof(int)); }); + + Failed += Check(Q, "memcpy", + [&]() { return Q.memcpy(Data1, Data2, N * sizeof(int)); }); + + Failed += Check(Q, "copy", [&]() { return Q.memcpy(Data1, Data2, N); }); + + Q.wait_and_throw(); + + sycl::free(Data1, Q); + sycl::free(Data2, Q); + + return Failed; +} diff --git a/sycl/test-e2e/InOrderEventsExt/set_external_event.cpp b/sycl/test-e2e/InOrderEventsExt/set_external_event.cpp new file mode 100644 index 0000000000000..45e5815606dbe --- /dev/null +++ b/sycl/test-e2e/InOrderEventsExt/set_external_event.cpp @@ -0,0 +1,54 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// Tests the ext_oneapi_set_external_event extension member on in-order queues. + +#include +#include + +constexpr size_t N = 1024; + +int main() { + sycl::context Ctx; + sycl::device Dev = Ctx.get_devices()[0]; + + sycl::queue Q1{Ctx, Dev, {sycl::property::queue::in_order{}}}; + sycl::queue Q2{Ctx, Dev, {sycl::property::queue::in_order{}}}; + + int *DevData = sycl::malloc_shared(N, Dev, Ctx); + int *HostData = (int *)malloc(N * sizeof(int) * 10); + + for (size_t I = 0; I < 10; ++I) { + Q1.fill(DevData, 0, N); + sycl::event E1 = Q1.parallel_for( + N, [=](sycl::item<1> Idx) { DevData[Idx] = 42 + Idx[0] + I; }); + + Q2.ext_oneapi_set_external_event(E1); + sycl::event E2 = + Q2.parallel_for(N, [=](sycl::item<1> Idx) { ++DevData[Idx]; }); + + Q1.ext_oneapi_set_external_event(E2); + Q1.copy(DevData, HostData + N * I, N); + } + + Q1.wait_and_throw(); + + int Failures = 0; + for (size_t I = 0; I < 10; ++I) { + for (size_t J = 0; J < N; ++J) { + int Expected = 43 + J + I; + int Actual = HostData[N * I + J]; + if (Expected != Actual) { + std::cout << "Result not matching the expected value at index {" << I + << ", " << J << "}: " << Expected << " != " << Actual + << std::endl; + ++Failures; + } + } + } + + sycl::free(DevData, Ctx); + free(HostData); + + return Failures; +} diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 6a3bbbab13a5e..e3097a79d6203 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3776,6 +3776,7 @@ _ZN4sycl3_V15queue25ext_oneapi_submit_barrierERKNS0_6detail13code_locationE _ZN4sycl3_V15queue25ext_oneapi_submit_barrierERKSt6vectorINS0_5eventESaIS3_EERKNS0_6detail13code_locationE _ZN4sycl3_V15queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE _ZN4sycl3_V15queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE +_ZN4sycl3_V15queue29ext_oneapi_set_external_eventERKNS0_5eventE _ZN4sycl3_V15queue6memcpyEPvPKvmNS0_5eventERKNS0_6detail13code_locationE _ZN4sycl3_V15queue6memcpyEPvPKvmRKNS0_6detail13code_locationE _ZN4sycl3_V15queue6memcpyEPvPKvmRKSt6vectorINS0_5eventESaIS6_EERKNS0_6detail13code_locationE @@ -4265,6 +4266,7 @@ _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue4cuda18use_default_streamEE _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue8in_orderEEEbv _ZNK4sycl3_V15queue16ext_oneapi_emptyEv _ZNK4sycl3_V15queue20ext_oneapi_get_stateEv +_ZNK4sycl3_V15queue25ext_oneapi_get_last_eventEv _ZNK4sycl3_V15queue28ext_codeplay_supports_fusionEv _ZNK4sycl3_V15queue3getEv _ZNK4sycl3_V15queue7is_hostEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index f20eb9cede900..87139b46f86c5 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -1045,6 +1045,7 @@ ?ext_oneapi_fill_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KHV?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_get_default_context@platform@_V1@sycl@@QEBA?AVcontext@23@XZ ?ext_oneapi_get_kernel@kernel_bundle_plain@detail@_V1@sycl@@QEAA?AVkernel@34@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z +?ext_oneapi_get_last_event@queue@_V1@sycl@@QEBA?AVevent@23@XZ ?ext_oneapi_get_state@queue@_V1@sycl@@QEBA?AW4queue_state@experimental@oneapi@ext@23@XZ ?ext_oneapi_graph@handler@_V1@sycl@@QEAAXV?$command_graph@$00@experimental@oneapi@ext@23@@Z ?ext_oneapi_graph@queue@_V1@sycl@@QEAA?AVevent@23@V?$command_graph@$00@experimental@oneapi@ext@23@AEBUcode_location@detail@23@@Z @@ -1069,6 +1070,7 @@ ?ext_oneapi_owner_before@?$OwnerLessBase@Vqueue@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVqueue@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vstream@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vstream@_V1@sycl@@@2oneapi@ext@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vstream@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVstream@34@@Z +?ext_oneapi_set_external_event@queue@_V1@sycl@@QEAAXAEBVevent@23@@Z ?ext_oneapi_signal_external_semaphore@handler@_V1@sycl@@QEAAXUinterop_semaphore_handle@experimental@oneapi@ext@23@@Z ?ext_oneapi_signal_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@AEBUcode_location@detail@23@@Z ?ext_oneapi_signal_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z