Skip to content

Commit

Permalink
[SYCL] Make handler.hpp independent from kernel_bundle.hpp (#16012)
Browse files Browse the repository at this point in the history
I was looking into ways of splitting `sycl.hpp` into different
finer-grained headers (with the intention to propose such split as a KHR
extension/SYCL-Next thing) and I decided to try and see what is the
impact of different header files on the compilation time.

I started my investigation with `kernel_bundle.hpp`. Looking at
[zjin-lcf/HeCBench](https://github.com/zjin-lcf/HeCBench), I do not see
any benchmarks that use it, so it seems like a good candidate for being
an opt-in header.

To do the measurements I decided to drop `#include <kernel_bundle.hpp>`
from `sycl.hpp` and then compare compilation time of two empty files
including `sycl.hpp` (the modified one and the original one).
Apparently, it is not that easy to drop an include, because there are so
many inter-dependencies on it. I succeeded and I see ~200ms device
compilation time improvement when `kernel_bundle.hpp` is not included at
all.

However, for my experiments I made some other hacks which I'm unable to
push into the repo, like dropping backend-specific headers as well.
Specifically, L0 backend interop has `kernel_bundle` as a struct member
of some of input or return types which requires a full definition.
SYCL spec ([6.3.7. Adding a
backend](https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_adding_a_backend))
allows backend interop headers to be put into separate headers and I
think that we should actually use this opportunity in the future and
drop them (somehow without many regressions) from `sycl.hpp`.
  • Loading branch information
AlexeySachkov authored Dec 2, 2024
1 parent ac207a1 commit 13ff78e
Showing 81 changed files with 150 additions and 52 deletions.
4 changes: 3 additions & 1 deletion sycl/include/sycl/backend.hpp
Original file line number Diff line number Diff line change
@@ -23,7 +23,6 @@
#include <sycl/exception.hpp> // for make_error_code
#include <sycl/feature_test.hpp> // for SYCL_BACKEND_OP...
#include <sycl/image.hpp> // for image, image_al...
#include <sycl/kernel_bundle.hpp> // for kernel_bundle
#include <sycl/kernel_bundle_enums.hpp> // for bundle_state
#include <sycl/platform.hpp> // for platform, get_n...
#include <sycl/queue.hpp> // for queue, get_native
@@ -56,6 +55,7 @@
namespace sycl {
inline namespace _V1 {

template <bundle_state State> class kernel_bundle;
class property_list;

namespace detail {
@@ -141,13 +141,15 @@ auto get_native(const queue &Obj) -> backend_return_t<BackendName, queue> {
int32_t IsImmCmdList;
ur_native_handle_t Handle = Obj.getNative(IsImmCmdList);
backend_return_t<BackendName, queue> RetVal;
#if SYCL_EXT_ONEAPI_BACKEND_LEVEL_ZERO
if constexpr (BackendName == backend::ext_oneapi_level_zero)
RetVal = IsImmCmdList
? backend_return_t<BackendName, queue>{reinterpret_cast<
ze_command_list_handle_t>(Handle)}
: backend_return_t<BackendName, queue>{
reinterpret_cast<ze_command_queue_handle_t>(Handle)};
else
#endif
RetVal = reinterpret_cast<backend_return_t<BackendName, queue>>(Handle);

return RetVal;
1 change: 0 additions & 1 deletion sycl/include/sycl/detail/backend_traits_cuda.hpp
Original file line number Diff line number Diff line change
@@ -18,7 +18,6 @@
#include <sycl/detail/backend_traits.hpp>
#include <sycl/device.hpp>
#include <sycl/event.hpp>
#include <sycl/kernel_bundle.hpp>
#include <sycl/queue.hpp>

typedef int CUdevice;
1 change: 0 additions & 1 deletion sycl/include/sycl/detail/backend_traits_hip.hpp
Original file line number Diff line number Diff line change
@@ -18,7 +18,6 @@
#include <sycl/detail/backend_traits.hpp>
#include <sycl/device.hpp>
#include <sycl/event.hpp>
#include <sycl/kernel_bundle.hpp>
#include <sycl/queue.hpp>

typedef int HIPdevice;
4 changes: 2 additions & 2 deletions sycl/include/sycl/detail/backend_traits_level_zero.hpp
Original file line number Diff line number Diff line change
@@ -21,14 +21,12 @@
#include <sycl/device.hpp> // for device
#include <sycl/event.hpp> // for event
#include <sycl/ext/oneapi/backend/level_zero_ownership.hpp> // for ownership
#include <sycl/handler.hpp> // for buffer
#include <sycl/image.hpp> // for image
#include <sycl/kernel.hpp> // for kernel
#include <sycl/kernel_bundle.hpp> // for kernel_b...
#include <sycl/kernel_bundle_enums.hpp> // for bundle_s...
#include <sycl/platform.hpp> // for platform
#include <sycl/property_list.hpp> // for property...
#include <sycl/queue.hpp> // for queue
#include <sycl/range.hpp> // for range

#include <variant> // for variant
@@ -46,6 +44,8 @@ typedef struct _ze_module_handle_t *ze_module_handle_t;

namespace sycl {
inline namespace _V1 {
class queue;

namespace detail {

// Forward declarations
19 changes: 12 additions & 7 deletions sycl/include/sycl/detail/backend_traits_opencl.hpp
Original file line number Diff line number Diff line change
@@ -22,17 +22,20 @@
#include <sycl/detail/ur.hpp> // for assertion and ur handles
#include <sycl/device.hpp> // for device
#include <sycl/event.hpp> // for event
#include <sycl/handler.hpp> // for buffer
#include <sycl/kernel.hpp> // for kernel
#include <sycl/kernel_bundle.hpp> // for kernel_bundle
#include <sycl/kernel_bundle_enums.hpp> // for bundle_state
#include <sycl/platform.hpp> // for platform
#include <sycl/queue.hpp> // for queue

#include <vector> // for vector

namespace sycl {
inline namespace _V1 {

template <bundle_state State> class kernel_bundle;
class queue;
template <typename T, int Dimensions, typename AllocatorT, typename Enable>
class buffer;

namespace detail {

// TODO the interops for context, device, event, platform and program
@@ -54,13 +57,15 @@ template <> struct interop<backend::opencl, platform> {
using type = cl_platform_id;
};

template <typename DataT, int Dimensions, typename AllocatorT>
struct BackendInput<backend::opencl, buffer<DataT, Dimensions, AllocatorT>> {
template <typename DataT, int Dimensions, typename AllocatorT, typename Enable>
struct BackendInput<backend::opencl,
buffer<DataT, Dimensions, AllocatorT, Enable>> {
using type = cl_mem;
};

template <typename DataT, int Dimensions, typename AllocatorT>
struct BackendReturn<backend::opencl, buffer<DataT, Dimensions, AllocatorT>> {
template <typename DataT, int Dimensions, typename AllocatorT, typename Enable>
struct BackendReturn<backend::opencl,
buffer<DataT, Dimensions, AllocatorT, Enable>> {
using type = std::vector<cl_mem>;
};

4 changes: 3 additions & 1 deletion sycl/include/sycl/ext/oneapi/backend/level_zero.hpp
Original file line number Diff line number Diff line change
@@ -24,7 +24,6 @@
#include <sycl/ext/oneapi/backend/level_zero_ownership.hpp> // for ownership
#include <sycl/image.hpp> // for image
#include <sycl/kernel.hpp> // for kernel
#include <sycl/kernel_bundle.hpp> // for kernel_bu...
#include <sycl/kernel_bundle_enums.hpp> // for bundle_state
#include <sycl/platform.hpp> // for platform
#include <sycl/properties/image_properties.hpp> // for image
@@ -39,6 +38,9 @@

namespace sycl {
inline namespace _V1 {

template <bundle_state State> class kernel_bundle;

namespace ext::oneapi::level_zero::detail {
__SYCL_EXPORT device make_device(const platform &Platform,
ur_native_handle_t NativeHandle);
Original file line number Diff line number Diff line change
@@ -19,7 +19,6 @@
#include <sycl/detail/backend_traits.hpp>
#include <sycl/device.hpp>
#include <sycl/event.hpp>
#include <sycl/kernel_bundle.hpp>
#include <sycl/queue.hpp>

#include <vector>
10 changes: 10 additions & 0 deletions sycl/include/sycl/ext/oneapi/get_kernel_info.hpp
Original file line number Diff line number Diff line change
@@ -11,10 +11,20 @@
#include <sycl/detail/export.hpp>
#include <sycl/detail/info_desc_helpers.hpp>
#include <sycl/device.hpp>
#include <sycl/kernel_bundle_enums.hpp>
#include <sycl/queue.hpp>

#include <vector>

namespace sycl {
inline namespace _V1 {

template <bundle_state State> class kernel_bundle;

template <typename KernelName, bundle_state State>
kernel_bundle<State> get_kernel_bundle(const context &,
const std::vector<device> &);

namespace ext::oneapi {

template <typename KernelName, typename Param>
5 changes: 4 additions & 1 deletion sycl/include/sycl/ext/oneapi/owner_less.hpp
Original file line number Diff line number Diff line change
@@ -16,7 +16,6 @@
#include <sycl/event.hpp> // for event
#include <sycl/ext/oneapi/weak_object.hpp> // for weak_object
#include <sycl/kernel.hpp> // for kernel
#include <sycl/kernel_bundle.hpp> // for kernel_id
#include <sycl/kernel_bundle_enums.hpp> // for bundle_state
#include <sycl/platform.hpp> // for platform
#include <sycl/properties/image_properties.hpp> // for sampled_i...
@@ -25,6 +24,10 @@

namespace sycl {
inline namespace _V1 {
class kernel_id;
template <bundle_state State> class kernel_bundle;
template <bundle_state State> class device_image;

namespace ext::oneapi {

namespace detail {
32 changes: 6 additions & 26 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
@@ -43,7 +43,6 @@
#include <sycl/id.hpp>
#include <sycl/item.hpp>
#include <sycl/kernel.hpp>
#include <sycl/kernel_bundle.hpp>
#include <sycl/kernel_bundle_enums.hpp>
#include <sycl/kernel_handler.hpp>
#include <sycl/nd_item.hpp>
@@ -141,6 +140,7 @@ inline namespace _V1 {

// Forward declaration

template <bundle_state State> class kernel_bundle;
class handler;
template <typename T, int Dimensions, typename AllocatorT, typename Enable>
class buffer;
@@ -162,6 +162,7 @@ class graph_impl;
} // namespace ext::oneapi::experimental::detail
namespace detail {

class kernel_bundle_impl;
class work_group_memory_impl;
class handler_impl;
class kernel_impl;
@@ -1710,36 +1711,15 @@ class __SYCL_EXPORT handler {
handler &operator=(const handler &) = delete;
handler &operator=(handler &&) = delete;

// Out-of-class definition within kernel_bundle.hpp
template <auto &SpecName>
void set_specialization_constant(
typename std::remove_reference_t<decltype(SpecName)>::value_type Value) {

setStateSpecConstSet();

std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImplPtr =
getOrInsertHandlerKernelBundle(/*Insert=*/true);

detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
KernelBundleImplPtr)
.set_specialization_constant<SpecName>(Value);
}
typename std::remove_reference_t<decltype(SpecName)>::value_type Value);

// Out-of-class definition within kernel_bundle.hpp
template <auto &SpecName>
typename std::remove_reference_t<decltype(SpecName)>::value_type
get_specialization_constant() const {

if (isStateExplicitKernelBundle())
throw sycl::exception(make_error_code(errc::invalid),
"Specialization constants cannot be read after "
"explicitly setting the used kernel bundle");

std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImplPtr =
getOrInsertHandlerKernelBundle(/*Insert=*/true);

return detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
KernelBundleImplPtr)
.get_specialization_constant<SpecName>();
}
get_specialization_constant() const;

void
use_kernel_bundle(const kernel_bundle<bundle_state::executable> &ExecBundle);
36 changes: 34 additions & 2 deletions sycl/include/sycl/kernel_bundle.hpp
Original file line number Diff line number Diff line change
@@ -14,8 +14,9 @@
#include <sycl/detail/kernel_desc.hpp> // for get_spec_constant_symboli...
#include <sycl/detail/owner_less_base.hpp> // for OwnerLessBase
#include <sycl/detail/string_view.hpp>
#include <sycl/detail/ur.hpp> // for cast
#include <sycl/device.hpp> // for device
#include <sycl/detail/ur.hpp> // for cast
#include <sycl/device.hpp> // for device
#include <sycl/handler.hpp>
#include <sycl/kernel.hpp> // for kernel, kernel_bundle
#include <sycl/kernel_bundle_enums.hpp> // for bundle_state
#include <sycl/property_list.hpp> // for property_list
@@ -1129,6 +1130,37 @@ build(kernel_bundle<bundle_state::ext_oneapi_source> &SourceKB,

} // namespace ext::oneapi::experimental

template <auto &SpecName>
void handler::set_specialization_constant(
typename std::remove_reference_t<decltype(SpecName)>::value_type Value) {

setStateSpecConstSet();

std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImplPtr =
getOrInsertHandlerKernelBundle(/*Insert=*/true);

detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
KernelBundleImplPtr)
.set_specialization_constant<SpecName>(Value);
}

template <auto &SpecName>
typename std::remove_reference_t<decltype(SpecName)>::value_type
handler::get_specialization_constant() const {

if (isStateExplicitKernelBundle())
throw sycl::exception(make_error_code(errc::invalid),
"Specialization constants cannot be read after "
"explicitly setting the used kernel bundle");

std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImplPtr =
getOrInsertHandlerKernelBundle(/*Insert=*/true);

return detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
KernelBundleImplPtr)
.get_specialization_constant<SpecName>();
}

} // namespace _V1
} // namespace sycl

1 change: 1 addition & 0 deletions sycl/include/syclcompat/util.hpp
Original file line number Diff line number Diff line change
@@ -36,6 +36,7 @@

#include <sycl/atomic_ref.hpp>
#include <sycl/group_barrier.hpp>
#include <sycl/kernel_bundle.hpp>

#include <syclcompat/math.hpp>
#include <syclcompat/memory.hpp>
1 change: 1 addition & 0 deletions sycl/test-e2e/AmdNvidiaJIT/kernel_and_bundle.cpp
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@
// that otherwise does not get run.

#include <sycl/detail/core.hpp>
#include <sycl/kernel_bundle.hpp>
#include <sycl/specialization_id.hpp>

constexpr size_t Size = 16;
1 change: 1 addition & 0 deletions sycl/test-e2e/Basic/backend_info.cpp
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@

#include <iostream>
#include <sycl/detail/core.hpp>
#include <sycl/kernel_bundle.hpp>

using namespace sycl;

1 change: 1 addition & 0 deletions sycl/test-e2e/Basic/kernel_bundle/kernel_bundle_api.cpp
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@

#include <iostream>
#include <sycl/detail/core.hpp>
#include <sycl/kernel_bundle.hpp>

#include <algorithm>
#include <vector>
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@

#include <iostream>
#include <sycl/detail/core.hpp>
#include <sycl/kernel_bundle.hpp>

#include <algorithm>
#include <vector>
2 changes: 2 additions & 0 deletions sycl/test-e2e/Basic/kernel_info.cpp
Original file line number Diff line number Diff line change
@@ -11,6 +11,8 @@

#include <cassert>
#include <sycl/detail/core.hpp>
#include <sycl/kernel_bundle.hpp>

#include <sycl/ext/oneapi/get_kernel_info.hpp>

using namespace sycl;
2 changes: 2 additions & 0 deletions sycl/test-e2e/Basic/kernel_info_attr.cpp
Original file line number Diff line number Diff line change
@@ -18,6 +18,8 @@

#include <cassert>
#include <sycl/detail/core.hpp>
#include <sycl/kernel_bundle.hpp>

#include <sycl/ext/oneapi/get_kernel_info.hpp>

using namespace sycl;
6 changes: 3 additions & 3 deletions sycl/test-e2e/Basic/kernel_max_wg_size.cpp
Original file line number Diff line number Diff line change
@@ -9,11 +9,11 @@
// for Intel GPU devices and only when using the Level Zero backend or OpenCL
// backend.

// clang-format off
#include <sycl/detail/core.hpp>
#include <sycl/usm.hpp>

#include <sycl/ext/intel/experimental/grf_size_properties.hpp>
// clang-format on
#include <sycl/kernel_bundle.hpp>
#include <sycl/usm.hpp>

using namespace sycl;

1 change: 1 addition & 0 deletions sycl/test-e2e/Basic/large-range.cpp
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@
#include <numeric>
#include <sycl/atomic_ref.hpp>
#include <sycl/detail/core.hpp>
#include <sycl/kernel_bundle.hpp>
#include <sycl/specialization_id.hpp>

using namespace sycl;
1 change: 1 addition & 0 deletions sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@
// RUN: %{run} %t.out

#include <sycl/detail/core.hpp>
#include <sycl/kernel_bundle.hpp>

#include <cassert>
#include <cstdint>
1 change: 1 addition & 0 deletions sycl/test-e2e/DeviceCodeSplit/Inputs/split-per-source.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <sycl/detail/core.hpp>
#include <sycl/kernel_bundle.hpp>

class File1Kern1;
class File1Kern2;
1 change: 1 addition & 0 deletions sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@
// XFAIL: hip_nvidia

#include <sycl/detail/core.hpp>
#include <sycl/kernel_bundle.hpp>

class Kern1;
class Kern2;
Loading

0 comments on commit 13ff78e

Please sign in to comment.