From 23167d00c474bc1f50038a8d3aae3c1e08859f23 Mon Sep 17 00:00:00 2001 From: Alberto Cabrera Date: Thu, 28 Sep 2023 13:57:28 +0100 Subject: [PATCH] [SYCL][COMPAT] Updated device to support memory queries. Minor fixes. --- sycl/doc/syclcompat/README.md | 217 ++++++--- sycl/include/syclcompat/defs.hpp | 42 +- sycl/include/syclcompat/device.hpp | 317 ++++++++++++-- sycl/include/syclcompat/kernel.hpp | 411 +++++++++++++++++- sycl/test-e2e/syclcompat/defs.cpp | 26 +- sycl/test-e2e/syclcompat/device/device.cpp | 320 ++++++++------ .../syclcompat/device/device_threaded.cpp | 53 ++- .../syclcompat/id_query/id_query_fixt.hpp | 6 +- sycl/test-e2e/syclcompat/launch/launch.cpp | 5 +- .../syclcompat/launch/launch_fixt.hpp | 4 +- 10 files changed, 1116 insertions(+), 285 deletions(-) diff --git a/sycl/doc/syclcompat/README.md b/sycl/doc/syclcompat/README.md index 00b855e9bbbdc..3d909d47f9c48 100644 --- a/sycl/doc/syclcompat/README.md +++ b/sycl/doc/syclcompat/README.md @@ -72,9 +72,9 @@ namespace syclcompat { class dim3 { public: const size_t x, y, z; - constexpr dim3(const sycl::range<3> &r); - constexpr dim3(const sycl::range<2> &r); - constexpr dim3(const sycl::range<1> &r); + dim3(const sycl::range<3> &r); + dim3(const sycl::range<2> &r); + dim3(const sycl::range<1> &r); constexpr dim3(size_t x, size_t y = 1, size_t z = 1); constexpr size_t size(); @@ -85,9 +85,9 @@ public: }; // Element-wise operators -dim3 operator*(const dim3 &a, const dim3 &b); -dim3 operator+(const dim3 &a, const dim3 &b); -dim3 operator-(const dim3 &a, const dim3 &b); +inline dim3 operator*(const dim3 &a, const dim3 &b); +inline dim3 operator+(const dim3 &a, const dim3 &b); +inline dim3 operator-(const dim3 &a, const dim3 &b); } // syclcompat ``` @@ -103,39 +103,39 @@ addition to the global range, the following helper functions are also provided: namespace syclcompat { namespace local_id { -size_t x(); -size_t y(); -size_t z(); +inline size_t x(); +inline size_t y(); +inline size_t z(); } // namespace local_id namespace local_range { -size_t x(); -size_t y(); -size_t z(); +inline size_t x(); +inline size_t y(); +inline size_t z(); } // namespace local_range namespace work_group_id { -size_t x(); -size_t y(); -size_t z(); +inline size_t x(); +inline size_t y(); +inline size_t z(); } // namespace work_group_id namespace work_group_range { -size_t x(); -size_t y(); -size_t z(); +inline size_t x(); +inline size_t y(); +inline size_t z(); } // namespace work_group_range namespace global_range { -size_t x(); -size_t y(); -size_t z(); +inline size_t x(); +inline size_t y(); +inline size_t z(); } // namespace global_range namespace global_id { -size_t x(); -size_t y(); -size_t z(); +inline size_t x(); +inline size_t y(); +inline size_t z(); } // namespace global_id } // syclcompat @@ -298,12 +298,13 @@ group size in each dimension. ```c++ namespace syclcompat { -void wg_barrier(); +inline void wg_barrier(); template -sycl::nd_range compute_nd_range(sycl::range global_size_in, - sycl::range work_group_size); -sycl::nd_range<1> compute_nd_range(int global_size_in, int work_group_size); +inline sycl::nd_range compute_nd_range(sycl::range global_size_in, + sycl::range work_group_size); +inline sycl::nd_range<1> compute_nd_range(int global_size_in, + int work_group_size); } // syclcompat ``` @@ -320,8 +321,8 @@ out-of-order queue, either created manually or retrieved via a call to ```c++ namespace syclcompat { -sycl::queue create_queue(bool print_on_async_exceptions = false, - bool in_order = true); +inline sycl::queue create_queue(bool print_on_async_exceptions = false, + bool in_order = true); } // syclcompat ``` @@ -670,17 +671,18 @@ class device_info { public: const char *get_name(); char *get_name(); - template , + template , std::enable_if_t> || std::is_same_v, int> = 0> auto get_max_work_item_sizes() const; - template , + template , std::enable_if_t> || std::is_same_v, int> = 0> auto get_max_work_item_sizes() const; + bool get_host_unified_memory() const; int get_major_version() const; int get_minor_version() const; int get_integrated() const; @@ -689,6 +691,7 @@ public: int get_max_work_group_size() const; int get_max_sub_group_size() const; int get_max_work_items_per_compute_unit() const; + int get_max_register_size_per_work_group() const; template || std::is_same_v, @@ -702,8 +705,17 @@ public: size_t get_global_mem_size() const; size_t get_local_mem_size() const; -void set_name(const char *name); - void set_max_work_item_sizes(const sycl::id<3> max_work_item_sizes); + unsigned int get_memory_clock_rate() const; + unsigned int get_memory_bus_width() const; + uint32_t get_device_id() const; + std::array get_uuid() const; + unsigned int get_global_mem_cache_size() const; + + void set_name(const char *name); + void set_max_work_item_sizes(const sycl::range<3> max_work_item_sizes); + [[deprecated]] void + set_max_work_item_sizes(const sycl::id<3> max_work_item_sizes); + void set_host_unified_memory(bool host_unified_memory); void set_major_version(int major); void set_minor_version(int minor); void set_integrated(int integrated); @@ -716,6 +728,13 @@ void set_name(const char *name); void set_max_work_items_per_compute_unit(int max_work_items_per_compute_unit); void set_max_nd_range_size(int max_nd_range_size[]); + void set_memory_clock_rate(unsigned int memory_clock_rate); + void set_memory_bus_width(unsigned int memory_bus_width); + void + set_max_register_size_per_work_group(int max_register_size_per_work_group); + void set_device_id(uint32_t device_id); + void set_uuid(std::array uuid); + void set_global_mem_cache_size(unsigned int global_mem_cache_size); }; ``` @@ -729,38 +748,38 @@ follows: namespace syclcompat { // Util function to create a new queue for the current device -sycl::queue create_queue(bool print_on_async_exceptions = false, - bool in_order = true); +static inline sycl::queue create_queue(bool print_on_async_exceptions = false, + bool in_order = true); // Util function to get the default queue of current device in // device manager. -sycl::queue get_default_queue(); +static inline sycl::queue get_default_queue(); // Util function to wait for the queued kernels. -void wait(sycl::queue q = get_default_queue()); +static inline void wait(sycl::queue q = get_default_queue()); // Util function to wait for the queued kernels and throw unhandled errors. -void wait_and_throw(sycl::queue q = get_default_queue()); +static inline void wait_and_throw(sycl::queue q = get_default_queue()); // Util function to get the id of current device in // device manager. -unsigned int get_current_device_id(); +static inline unsigned int get_current_device_id(); // Util function to get the current device. -device_ext &get_current_device(); +static inline device_ext &get_current_device(); // Util function to get a device by id. -device_ext &get_device(unsigned int id); +static inline device_ext &get_device(unsigned int id); // Util function to get the context of the default queue of current // device in device manager. -sycl::context get_default_context(); +static inline sycl::context get_default_context(); // Util function to get a CPU device. -device_ext &cpu_device(); +static inline device_ext &cpu_device(); // Util function to select a device by its id -unsigned int select_device(unsigned int id); +static inline unsigned int select_device(unsigned int id); } // syclcompat ``` @@ -778,6 +797,9 @@ destructor waits on a set of `sycl::event` which can be added to via `add_event`. This is used, for example, to implement `syclcompat::free_async` to schedule release of memory after a kernel or `mempcy`. SYCL device properties can be queried through `device_ext` as well. +`device_ext` also provides the `has_capability_or_fail` member function, which +throws a `std::runtime_error` if the device does not have the specified list of +`sycl::aspect`. The class is exposed as follows: @@ -786,19 +808,26 @@ namespace syclcompat { class device_ext : public sycl::device { device_ext(); - device_ext(const sycl::device &base); + device_ext(const sycl::device &base, bool print_on_async_exceptions = false, + bool in_order = true); ~device_ext(); bool is_native_host_atomic_supported(); - int get_major_version(); - int get_minor_version(); - int get_max_compute_units(); - int get_max_clock_frequency(); - int get_integrated(); - void get_device_info(device_info &out); + int get_major_version() const; + int get_minor_version() const; + int get_max_compute_units() const; + int get_max_clock_frequency() const; + int get_integrated() const; + int get_max_sub_group_size() const; + int get_max_register_size_per_work_group() const; + int get_max_work_group_size() const; + int get_mem_base_addr_align() const; + size_t get_global_mem_size() const; + void get_memory_info(size_t &free_memory, size_t &total_memory); - device_info get_device_info(); - void reset(); + void get_device_info(device_info &out) const; + device_info get_device_info() const; + void reset(bool print_on_async_exceptions = false, bool in_order = true); sycl::queue *default_queue(); void queues_wait_and_throw(); @@ -808,6 +837,12 @@ class device_ext : public sycl::device { void set_saved_queue(sycl::queue *q); sycl::queue *get_saved_queue(); sycl::context get_context(); + + // Util function to check whether a device supports some kinds of + // sycl::aspect. + // A std::runtime_error is thrown if the aspect is not supported + void + has_capability_or_fail(const std::initializer_list &props); }; } // syclcompat @@ -1114,23 +1149,52 @@ kernel names during machine translation. `get_sycl_language_version` returns an integer representing the version of the SYCL spec supported by the current SYCL compiler. +The `SYCLCOMPAT_CHECK_ERROR` macro encapsulates an error-handling mechanism for +expressions that might throw exceptions. If no exceptions are thrown, it returns +`syclcompat::error_code::SUCCESS`. If an exception is caught, it prints the +error message to the standard error stream and returns +`syclcompat::error_code::DEFAULT_ERROR`. + ``` c++ namespace syclcompat { -#define __sycl_compat_align__(n) __attribute__((aligned(n))) -#define __sycl_compat_inline__ __inline__ __attribute__((always_inline)) +template class syclcompat_kernel_name; +template class syclcompat_kernel_scalar; + +#if defined(_MSC_VER) +#define __syclcompat_align__(n) __declspec(align(n)) +#define __syclcompat_inline__ __forceinline +#else +#define __syclcompat_align__(n) __attribute__((aligned(n))) +#define __syclcompat_inline__ __inline__ __attribute__((always_inline)) +#endif + +#if defined(_MSC_VER) +#define __syclcompat_noinline__ __declspec(noinline) +#else +#define __syclcompat_noinline__ __attribute__((noinline)) +#endif -#define __sycl_compat_noinline__ __attribute__((noinline)) +#define SYCLCOMPAT_COMPATIBILITY_TEMP (600) -template class sycl_compat_kernel_name; -template class sycl_compat_kernel_scalar; +#ifdef _WIN32 +#define SYCLCOMPAT_EXPORT __declspec(dllexport) +#else +#define SYCLCOMPAT_EXPORT +#endif + +namespace syclcompat { +enum error_code { SUCCESS = 0, DEFAULT_ERROR = 999 }; +} + +#define SYCLCOMPAT_CHECK_ERROR(expr) int get_sycl_language_version(); } // namespace syclcompat ``` -#### Kernel Helper Functions +### Kernel Helper Functions Kernel helper functions provide a structure `kernel_function_info` to keep SYCL kernel information, and provide a utility function `get_kernel_function_info()` @@ -1138,6 +1202,11 @@ to get the kernel information. Overloads are provided to allow either returning a `kernel_function_info` object, or to return by pointer argument. In the current version, `kernel_function_info` describes only maximum work-group size. +SYCLcompat also provides the `kernel_library` and `kernel_function` classes. +`kernel_library` facilitates the loading and unloading of kernel libraries. +`kernel_function` represents a specific kernel function within a loaded librariy +and can be invoked with specified arguments. + ``` c++ namespace syclcompat { @@ -1148,6 +1217,34 @@ struct kernel_function_info { static void get_kernel_function_info(kernel_function_info *kernel_info, const void *function); static kernel_function_info get_kernel_function_info(const void *function); + +class kernel_library { + kernel_library(); + kernel_library(void *ptr); + operator void *() const; +}; + +static kernel_library load_kernel_library(const std::string &name); +static kernel_library load_kernel_library_mem(char const *const image); +static void unload_kernel_library(const kernel_library &library); + +class kernel_function { + kernel_function(); + kernel_function(kernel_functor ptr); + operator void *() const; + void operator()(sycl::queue &q, const sycl::nd_range<3> &range, + unsigned int a, void **args, void **extra); +}; + +static kernel_function get_kernel_function(kernel_library &library, + const std::string &name); +static void invoke_kernel_function(kernel_function &function, + sycl::queue &queue, + sycl::range<3> groupRange, + sycl::range<3> localRange, + unsigned int localMemSize, + void **kernelParams, void **extra); + } // namespace syclcompat ``` diff --git a/sycl/include/syclcompat/defs.hpp b/sycl/include/syclcompat/defs.hpp index 6e4d76cf99403..34ffa75647277 100644 --- a/sycl/include/syclcompat/defs.hpp +++ b/sycl/include/syclcompat/defs.hpp @@ -32,12 +32,42 @@ #pragma once -template class sycl_compat_kernel_name; -template class sycl_compat_kernel_scalar; +template class syclcompat_kernel_name; +template class syclcompat_kernel_scalar; -#define __sycl_compat_align__(n) alignas(n) -#define __sycl_compat_inline__ __inline__ __attribute__((always_inline)) +#if defined(_MSC_VER) +#define __syclcompat_align__(n) __declspec(align(n)) +#define __syclcompat_inline__ __forceinline +#else +#define __syclcompat_align__(n) __attribute__((aligned(n))) +#define __syclcompat_inline__ __inline__ __attribute__((always_inline)) +#endif -#define __sycl_compat_noinline__ __attribute__((noinline)) +#if defined(_MSC_VER) +#define __syclcompat_noinline__ __declspec(noinline) +#else +#define __syclcompat_noinline__ __attribute__((noinline)) +#endif -#define SYCL_COMPAT_COMPATIBILITY_TEMP (600) +#define SYCLCOMPAT_COMPATIBILITY_TEMP (600) + +#ifdef _WIN32 +#define SYCLCOMPAT_EXPORT __declspec(dllexport) +#else +#define SYCLCOMPAT_EXPORT +#endif + +namespace syclcompat { +enum error_code { SUCCESS = 0, DEFAULT_ERROR = 999 }; +} + +#define SYCLCOMPAT_CHECK_ERROR(expr) \ + [&]() { \ + try { \ + expr; \ + return syclcompat::error_code::SUCCESS; \ + } catch (std::exception const &e) { \ + std::cerr << e.what() << std::endl; \ + return syclcompat::error_code::DEFAULT_ERROR; \ + } \ + }() diff --git a/sycl/include/syclcompat/device.hpp b/sycl/include/syclcompat/device.hpp index be02a42aff4cf..2eab51d6deff6 100644 --- a/sycl/include/syclcompat/device.hpp +++ b/sycl/include/syclcompat/device.hpp @@ -45,7 +45,9 @@ #include #endif #if defined(_WIN64) +#ifndef NOMINMAX #define NOMINMAX +#endif #include #endif @@ -60,7 +62,7 @@ namespace syclcompat { namespace detail { /// SYCL default exception handler -auto exception_handler = [](sycl::exception_list exceptions) { +inline auto exception_handler = [](sycl::exception_list exceptions) { for (std::exception_ptr const &e : exceptions) { try { std::rethrow_exception(e); @@ -80,6 +82,8 @@ using event_ptr = sycl::event *; using queue_ptr = sycl::queue *; +using device_ptr = char *; + /// Destroy \p event pointed memory. /// /// \param event Pointer to the sycl::event address. @@ -90,26 +94,27 @@ class device_info { // get interface const char *get_name() const { return _name; } char *get_name() { return _name; } - template , - std::enable_if_t> || + template , + std::enable_if_t> || std::is_same_v, int> = 0> auto get_max_work_item_sizes() const { - if constexpr (std::is_same_v>) + if constexpr (std::is_same_v>) return _max_work_item_sizes; else return _max_work_item_sizes_i; } - template , - std::enable_if_t> || + template , + std::enable_if_t> || std::is_same_v, int> = 0> auto get_max_work_item_sizes() { - if constexpr (std::is_same_v>) + if constexpr (std::is_same_v>) return _max_work_item_sizes; else return _max_work_item_sizes_i; } + bool get_host_unified_memory() const { return _host_unified_memory; } int get_major_version() const { return _major; } int get_minor_version() const { return _minor; } int get_integrated() const { return _integrated; } @@ -120,6 +125,9 @@ class device_info { int get_max_work_items_per_compute_unit() const { return _max_work_items_per_compute_unit; } + int get_max_register_size_per_work_group() const { + return _max_register_size_per_work_group; + } template || std::is_same_v, @@ -142,15 +150,44 @@ class device_info { } size_t get_global_mem_size() const { return _global_mem_size; } size_t get_local_mem_size() const { return _local_mem_size; } + /// Returns the maximum clock rate of device's global memory in kHz. If + /// compiler does not support this API then returns default value 3200000 kHz. + unsigned int get_memory_clock_rate() const { return _memory_clock_rate; } + /// Returns the maximum bus width between device and memory in bits. If + /// compiler does not support this API then returns default value 64 bits. + unsigned int get_memory_bus_width() const { return _memory_bus_width; } + uint32_t get_device_id() const { return _device_id; } + std::array get_uuid() const { return _uuid; } + /// Returns global memory cache size in bytes. + unsigned int get_global_mem_cache_size() const { + return _global_mem_cache_size; + } + // set interface void set_name(const char *name) { - std::strncpy(_name, name, device_info::NAME_BUFFER_SIZE); + size_t length = strlen(name); + if (length < device_info::NAME_BUFFER_SIZE) { + std::memcpy(_name, name, length + 1); + } else { + std::memcpy(_name, name, device_info::NAME_BUFFER_SIZE - 1); + _name[255] = '\0'; + } } - void set_max_work_item_sizes(const sycl::id<3> max_work_item_sizes) { + void set_max_work_item_sizes(const sycl::range<3> max_work_item_sizes) { _max_work_item_sizes = max_work_item_sizes; for (int i = 0; i < 3; ++i) _max_work_item_sizes_i[i] = max_work_item_sizes[i]; } + [[deprecated]] void + set_max_work_item_sizes(const sycl::id<3> max_work_item_sizes) { + for (int i = 0; i < 3; ++i) { + _max_work_item_sizes[i] = max_work_item_sizes[i]; + _max_work_item_sizes_i[i] = max_work_item_sizes[i]; + } + } + void set_host_unified_memory(bool host_unified_memory) { + _host_unified_memory = host_unified_memory; + } void set_major_version(int major) { _major = major; } void set_minor_version(int minor) { _minor = minor; } void set_integrated(int integrated) { _integrated = integrated; } @@ -180,25 +217,49 @@ class device_info { _max_nd_range_size_i[i] = max_nd_range_size[i]; } } + void set_memory_clock_rate(unsigned int memory_clock_rate) { + _memory_clock_rate = memory_clock_rate; + } + void set_memory_bus_width(unsigned int memory_bus_width) { + _memory_bus_width = memory_bus_width; + } + void + set_max_register_size_per_work_group(int max_register_size_per_work_group) { + _max_register_size_per_work_group = max_register_size_per_work_group; + } + void set_device_id(uint32_t device_id) { _device_id = device_id; } + void set_uuid(std::array uuid) { _uuid = std::move(uuid); } + void set_global_mem_cache_size(unsigned int global_mem_cache_size) { + _global_mem_cache_size = global_mem_cache_size; + } private: constexpr static size_t NAME_BUFFER_SIZE = 256; char _name[device_info::NAME_BUFFER_SIZE]; - sycl::id<3> _max_work_item_sizes; + sycl::range<3> _max_work_item_sizes; int _max_work_item_sizes_i[3]; + bool _host_unified_memory = false; int _major; int _minor; int _integrated = 0; int _frequency; + // Set estimated value 3200000 kHz as default value. + unsigned int _memory_clock_rate = 3200000; + // Set estimated value 64 bits as default value. + unsigned int _memory_bus_width = 64; + unsigned int _global_mem_cache_size; int _max_compute_units; int _max_work_group_size; int _max_sub_group_size; int _max_work_items_per_compute_unit; + int _max_register_size_per_work_group; size_t _global_mem_size; size_t _local_mem_size; size_t _max_nd_range_size[3]; int _max_nd_range_size_i[3]; + uint32_t _device_id; + std::array _uuid; }; /// device extension @@ -210,15 +271,16 @@ class device_ext : public sycl::device { sycl::event::wait(_events); _queues.clear(); } - device_ext(const sycl::device &base) : sycl::device(base), _ctx(*this) { + device_ext(const sycl::device &base, bool print_on_async_exceptions = false, + bool in_order = true) + : sycl::device(base), _ctx(*this) { if (!this->has(sycl::aspect::usm_device_allocations)) { throw std::invalid_argument( "Device does not support device USM allocations"); } - _queues.push_back( - std::make_shared(_ctx, base, detail::exception_handler, - sycl::property::queue::in_order())); - _saved_queue = _default_queue = _queues[0].get(); + // calls create_queue since we haven't lock(m_mutex); + _saved_queue = _default_queue = + create_queue(print_on_async_exceptions, in_order); } bool is_native_host_atomic_supported() { return false; } @@ -234,12 +296,60 @@ class device_ext : public sycl::device { return get_device_info().get_max_compute_units(); } + /// Return the maximum clock frequency of this device in KHz. int get_max_clock_frequency() const { return get_device_info().get_max_clock_frequency(); } int get_integrated() const { return get_device_info().get_integrated(); } + int get_max_sub_group_size() const { + return get_device_info().get_max_sub_group_size(); + } + + int get_max_register_size_per_work_group() const { + return get_device_info().get_max_register_size_per_work_group(); + } + + int get_max_work_group_size() const { + return get_device_info().get_max_work_group_size(); + } + + int get_mem_base_addr_align() const { + return get_info(); + } + + size_t get_global_mem_size() const { + return get_device_info().get_global_mem_size(); + } + + /// Get the number of bytes of free and total memory on the SYCL device. + /// \param [out] free_memory The number of bytes of free memory on the SYCL + /// device. + /// \param [out] total_memory The number of bytes of total memory on the SYCL + /// device. + void get_memory_info(size_t &free_memory, size_t &total_memory) { +#if (defined(__SYCL_COMPILER_VERSION) && __SYCL_COMPILER_VERSION >= 20221105) + if (!has(sycl::aspect::ext_intel_free_memory)) { + std::cerr << "get_memory_info: ext_intel_free_memory is not supported." + << std::endl; + free_memory = 0; + } else { + free_memory = get_info(); + } +#else + std::cerr << "get_memory_info: ext_intel_free_memory is not supported." + << std::endl; + free_memory = 0; +#if defined(_MSC_VER) && !defined(__clang__) +#pragma message("Querying the number of bytes of free memory is not supported") +#else +#warning "Querying the number of bytes of free memory is not supported" +#endif +#endif + total_memory = get_device_info().get_global_mem_size(); + } + void get_device_info(device_info &out) const { device_info prop; prop.set_name(get_info().c_str()); @@ -269,6 +379,35 @@ class device_ext : public sycl::device { prop.set_global_mem_size(get_info()); prop.set_local_mem_size(get_info()); +#if (defined(SYCL_EXT_INTEL_DEVICE_INFO) && SYCL_EXT_INTEL_DEVICE_INFO >= 6) + if (has(sycl::aspect::ext_intel_memory_clock_rate)) { + unsigned int tmp = + get_info(); + if (tmp != 0) + prop.set_memory_clock_rate(1000 * tmp); + } + if (has(sycl::aspect::ext_intel_memory_bus_width)) { + prop.set_memory_bus_width( + get_info()); + } + if (has(sycl::aspect::ext_intel_device_id)) { + prop.set_device_id(get_info()); + } + if (has(sycl::aspect::ext_intel_device_info_uuid)) { + prop.set_uuid(get_info()); + } +#elif defined(_MSC_VER) && !defined(__clang__) +#pragma message("get_device_info: querying memory_clock_rate and \ +memory_bus_width are not supported by the compiler used. \ +Use 3200000 kHz as memory_clock_rate default value. \ +Use 64 bits as memory_bus_width default value.") +#else +#warning "get_device_info: querying memory_clock_rate and \ +memory_bus_width are not supported by the compiler used. \ +Use 3200000 kHz as memory_clock_rate default value. \ +Use 64 bits as memory_bus_width default value." +#endif + size_t max_sub_group_size = 1; std::vector sub_group_sizes = get_info(); @@ -285,6 +424,12 @@ class device_ext : public sycl::device { int max_nd_range_size[] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}; prop.set_max_nd_range_size(max_nd_range_size); + // Estimates max register size per work group, feel free to update the value + // according to device properties. + prop.set_max_register_size_per_work_group(65536); + + prop.set_global_mem_cache_size( + get_info()); out = prop; } @@ -294,7 +439,7 @@ class device_ext : public sycl::device { return prop; } - void reset() { + void reset(bool print_on_async_exceptions = false, bool in_order = true) { std::lock_guard lock(m_mutex); // The queues are shared_ptrs and the ref counts of the shared_ptrs increase // only in wait_and_throw(). If there is no other thread calling @@ -302,11 +447,10 @@ class device_ext : public sycl::device { // all commands executing on the queue to complete. It isn't possible to // destroy a queue immediately. This is a synchronization point in SYCL. _queues.clear(); - // create new default queue. - _queues.push_back( - std::make_shared(_ctx, *this, detail::exception_handler, - sycl::property::queue::in_order())); - _saved_queue = _default_queue = _queues.front().get(); + // create new default queue + // calls create_queue_impl since we already lock(m_mutex); + _saved_queue = _default_queue = + create_queue_impl(print_on_async_exceptions, in_order); } sycl::queue *default_queue() { return _default_queue; } @@ -321,22 +465,12 @@ class device_ext : public sycl::device { // Guard the destruct of current_queues to make sure the ref count is safe. lock.lock(); } - queue_ptr create_queue(bool print_on_async_exceptions = false, - bool in_order = true) { + sycl::queue *create_queue(bool print_on_async_exceptions = false, + bool in_order = true) { std::lock_guard lock(m_mutex); - sycl::property_list prop = {}; - if (in_order) { - prop = {sycl::property::queue::in_order()}; - } - if (print_on_async_exceptions) { - _queues.push_back(std::make_shared( - _ctx, *this, detail::exception_handler, prop)); - } else { - _queues.push_back(std::make_shared(_ctx, *this, prop)); - } - return _queues.back().get(); + return create_queue_impl(print_on_async_exceptions, in_order); } - void destroy_queue(queue_ptr &queue) { + void destroy_queue(sycl::queue *&queue) { std::lock_guard lock(m_mutex); _queues.erase( std::remove_if(_queues.begin(), _queues.end(), @@ -346,17 +480,81 @@ class device_ext : public sycl::device { _queues.end()); queue = nullptr; } - void set_saved_queue(queue_ptr q) { + void set_saved_queue(sycl::queue *q) { std::lock_guard lock(m_mutex); _saved_queue = q; } - queue_ptr get_saved_queue() const { + sycl::queue *get_saved_queue() const { std::lock_guard lock(m_mutex); return _saved_queue; } sycl::context get_context() const { return _ctx; } + /// Util function to check whether a device supports some kinds of + /// sycl::aspect. + void + has_capability_or_fail(const std::initializer_list &props) { + for (const auto &it : props) { + if (has(it)) + continue; + switch (it) { + case sycl::aspect::fp64: + throw std::runtime_error("[SYCLcompat] 'double' is not supported in '" + + get_info() + + "' device"); + break; + case sycl::aspect::fp16: + throw std::runtime_error("[SYCLcompat] 'half' is not supported in '" + + get_info() + + "' device"); + break; + default: +#define __SYCL_ASPECT(ASPECT, ID) \ + case sycl::aspect::ASPECT: \ + return #ASPECT; +#define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID) +#define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE) + auto getAspectNameStr = [](sycl::aspect AspectNum) -> std::string { + switch (AspectNum) { +#include +#include + default: + return "unknown aspect"; + } + }; +#undef __SYCL_ASPECT_DEPRECATED_ALIAS +#undef __SYCL_ASPECT_DEPRECATED +#undef __SYCL_ASPECT + throw std::runtime_error("[SYCLcompat] '" + getAspectNameStr(it) + + "' is not supported in '" + + get_info() + + "' device"); + } + break; + } + } + private: + /// Caller should only be done from functions where the resource \p m_mutex + /// has been acquired. + queue_ptr create_queue_impl(bool print_on_async_exceptions = false, + bool in_order = true) { + sycl::property_list prop = {}; + if (in_order) { + prop = {sycl::property::queue::in_order()}; + } +#ifdef SYCLCOMPAT_PROFILING_ENABLED + prop.push_back(sycl::property::queue::enable_profiling()); +#endif + if (print_on_async_exceptions) { + _queues.push_back(std::make_shared( + _ctx, *this, detail::exception_handler, prop)); + } else { + _queues.push_back(std::make_shared(_ctx, *this, prop)); + } + return _queues.back().get(); + } + void get_version(int &major, int &minor) const { // Version string has the following format: // a. OpenCL @@ -391,6 +589,7 @@ class device_ext : public sycl::device { } friend sycl::event free_async(const std::vector &, const std::vector &, sycl::queue); + queue_ptr _default_queue; queue_ptr _saved_queue; sycl::context _ctx; @@ -420,7 +619,7 @@ class dev_mgr { return *_devs[dev_id]; } device_ext &cpu_device() const { - std::lock_guard lock(m_mutex); + std::lock_guard lock(m_mutex); if (_cpu_device == -1) { throw std::runtime_error("[SYCLcompat] No valid cpu device"); } else { @@ -428,19 +627,23 @@ class dev_mgr { } } device_ext &get_device(unsigned int id) const { - std::lock_guard lock(m_mutex); + std::lock_guard lock(m_mutex); check_id(id); return *_devs[id]; } unsigned int current_device_id() const { - std::lock_guard lock(m_mutex); + std::lock_guard lock(m_mutex); auto it = _thread2dev_map.find(get_tid()); if (it != _thread2dev_map.end()) return it->second; return _default_device_id; } + + /// Select device with a device ID. + /// \param [in] id The id of the device which can + /// be obtained through get_device_id(const sycl::device). void select_device(unsigned int id) { - std::lock_guard lock(m_mutex); + std::lock_guard lock(m_mutex); check_id(id); _thread2dev_map[get_tid()] = id; } @@ -457,6 +660,15 @@ class dev_mgr { return id; } + template + std::enable_if_t< + std::is_invocable_r_v> + select_device(const DeviceSelector &selector = sycl::gpu_selector_v) { + sycl::device selected_device = sycl::device(selector); + unsigned int selected_device_id = get_device_id(selected_device); + select_device(selected_device_id); + } + /// Returns the instance of device manager singleton. static dev_mgr &instance() { static dev_mgr d_m; @@ -468,7 +680,7 @@ class dev_mgr { dev_mgr &operator=(dev_mgr &&) = delete; private: - mutable std::mutex m_mutex; + mutable std::recursive_mutex m_mutex; dev_mgr() { sycl::device default_device = sycl::device(sycl::default_selector_v); _devs.push_back(std::make_shared(default_device)); @@ -505,8 +717,8 @@ class dev_mgr { } // namespace detail -inline sycl::queue create_queue(bool print_on_async_exceptions = false, - bool in_order = true) { +static inline sycl::queue create_queue(bool print_on_async_exceptions = false, + bool in_order = true) { return *detail::dev_mgr::instance().current_device().create_queue( print_on_async_exceptions, in_order); } @@ -517,7 +729,11 @@ static inline sycl::queue get_default_queue() { return *detail::dev_mgr::instance().current_device().default_queue(); } -inline void wait(sycl::queue q = get_default_queue()) { q.wait(); } +static inline void wait(sycl::queue q = get_default_queue()) { q.wait(); } + +static inline void wait_and_throw(sycl::queue q = get_default_queue()) { + q.wait_and_throw(); +} /// Util function to get the id of current device in /// device manager. @@ -551,4 +767,15 @@ static inline unsigned int select_device(unsigned int id) { return id; } +template +static inline std::enable_if_t< + std::is_invocable_r_v> +select_device(const DeviceSelector &selector = sycl::gpu_selector_v) { + detail::dev_mgr::instance().select_device(selector); +} + +static inline unsigned int get_device_id(const sycl::device &dev) { + return detail::dev_mgr::instance().get_device_id(dev); +} + } // namespace syclcompat diff --git a/sycl/include/syclcompat/kernel.hpp b/sycl/include/syclcompat/kernel.hpp index 0132e98e43841..0083fe1d9f993 100644 --- a/sycl/include/syclcompat/kernel.hpp +++ b/sycl/include/syclcompat/kernel.hpp @@ -35,6 +35,25 @@ #include #include +#ifdef _WIN32 +#include +#include +#else +#include +#endif + +#if defined(__has_include) && __has_include() +#include +#elif defined(__has_include) && __has_include() +#include +#else +#error "SYCLomatic runtime requires C++ filesystem support" +#endif + +#include +#include +#include + namespace syclcompat { typedef void (*kernel_functor)(sycl::queue &, const sycl::nd_range<3> &, @@ -44,14 +63,15 @@ struct kernel_function_info { int max_work_group_size = 0; }; -static void get_kernel_function_info(kernel_function_info *kernel_info, - const void *function) { +static inline void get_kernel_function_info(kernel_function_info *kernel_info, + const void *function) { kernel_info->max_work_group_size = detail::dev_mgr::instance() .current_device() .get_info(); } -static kernel_function_info get_kernel_function_info(const void *function) { +static inline kernel_function_info +get_kernel_function_info(const void *function) { kernel_function_info kernel_info; kernel_info.max_work_group_size = detail::dev_mgr::instance() @@ -60,4 +80,389 @@ static kernel_function_info get_kernel_function_info(const void *function) { return kernel_info; } +namespace detail { + +#if defined(__has_include) && __has_include() +namespace fs = std::filesystem; +#else +namespace fs = std::experimental::filesystem; +#endif + +/// Write data to temporary file and return absolute path to temporary file. +/// Temporary file is created in a temporary directory both of which have random +/// names with only the user having access permissions. Only one temporary file +/// will be created in the temporary directory. +static inline fs::path write_data_to_file(char const *const data, size_t size) { + std::error_code ec; + + if (sizeof(size_t) >= sizeof(std::streamsize) && + size > (std::numeric_limits::max)()) + throw std::runtime_error("[SYCLcompat] data file too large"); + + // random number generator + std::random_device dev; + std::mt19937 prng(dev()); + std::uniform_int_distribution rand(0); + + // find temporary directory + auto tmp_dir = fs::temp_directory_path(ec); + if (ec) + throw std::runtime_error("[SYCLcompat] could not find temporary directory"); + + // create private directory + std::stringstream directory; + fs::path directory_path; + constexpr int max_attempts = 5; + int i; + + for (i = 0; i < max_attempts; i++) { + directory << std::hex << rand(prng); + directory_path = tmp_dir / directory.str(); + if (fs::create_directory(directory_path)) { + break; + } + } + if (i == max_attempts) + throw std::runtime_error("[SYCLcompat] could not create directory"); + + // only allow owner permissions to private directory + fs::permissions(directory_path, fs::perms::owner_all, ec); + if (ec) + throw std::runtime_error( + "[SYCLcompat] could not set directory permissions"); + + // random filename in private directory + std::stringstream filename; + filename << std::hex << rand(prng); +#ifdef _WIN32 + auto filepath = directory_path / (filename.str() + ".dll"); +#else + auto filepath = directory_path / filename.str(); +#endif + + // write data to temporary file + auto outfile = std::ofstream(filepath, std::ios::out | std::ios::binary); + if (outfile) { + // only allow program to write file + fs::permissions(filepath, fs::perms::owner_write, ec); + if (ec) + throw std::runtime_error("[SYCLcompat] could not set permissions"); + + outfile.write(data, size); + if (!outfile.good()) + throw std::runtime_error("[SYCLcompat] could not write data"); + outfile.close(); + + // only allow program to read/execute file + fs::permissions(filepath, fs::perms::owner_read | fs::perms::owner_exec, + ec); + if (ec) + throw std::runtime_error("[SYCLcompat] could not set permissions"); + } else + throw std::runtime_error("[SYCLcompat] could not write data"); + + // check temporary file contents + auto infile = std::ifstream(filepath, std::ios::in | std::ios::binary); + if (infile) { + bool mismatch = false; + size_t cnt = 0; + + while (1) { + char c; + infile.get(c); + if (infile.eof()) + break; + if (c != data[cnt++]) + mismatch = true; + } + if (cnt != size || mismatch) + throw std::runtime_error( + "[SYCLcompat] file contents not written correctly"); + } else + throw std::runtime_error("[SYCLcompat] could not validate file"); + + if (!filepath.is_absolute()) + throw std::runtime_error("[SYCLcompat] temporary filepath is not absolute"); + + return filepath; +} + +static inline uint16_t extract16(unsigned char const *const ptr) { + uint16_t ret = 0; + + ret |= static_cast(ptr[0]) << 0; + ret |= static_cast(ptr[1]) << 8; + + return (ret); +} + +static inline uint32_t extract32(unsigned char const *const ptr) { + uint32_t ret = 0; + + ret |= static_cast(ptr[0]) << 0; + ret |= static_cast(ptr[1]) << 8; + ret |= static_cast(ptr[2]) << 16; + ret |= static_cast(ptr[3]) << 24; + + return (ret); +} + +static inline uint64_t extract64(unsigned char const *const ptr) { + uint64_t ret = 0; + + ret |= static_cast(ptr[0]) << 0; + ret |= static_cast(ptr[1]) << 8; + ret |= static_cast(ptr[2]) << 16; + ret |= static_cast(ptr[3]) << 24; + ret |= static_cast(ptr[4]) << 32; + ret |= static_cast(ptr[5]) << 40; + ret |= static_cast(ptr[6]) << 48; + ret |= static_cast(ptr[7]) << 56; + + return (ret); +} + +static inline uint64_t get_lib_size(char const *const blob) { +#ifdef _WIN32 + /////////////////////////////////////////////////////////////////////// + // Analyze DOS stub + unsigned char const *const ublob = + reinterpret_cast(blob); + if (ublob[0] != 0x4d || ublob[1] != 0x5a) { + throw std::runtime_error("[SYCLcompat] blob is not a Windows DLL."); + } + uint32_t pe_header_offset = extract32(ublob + 0x3c); + + /////////////////////////////////////////////////////////////////////// + // Ananlyze PE-header + unsigned char const *const pe_header = ublob + pe_header_offset; + + // signature + uint32_t pe_signature = extract32(pe_header + 0); + if (pe_signature != 0x00004550) { + throw std::runtime_error( + "[SYCLcompat] PE-header signature is not 0x00004550"); + } + + // machine + uint16_t machine = extract16(pe_header + 4); + if (machine != 0x8664) { + throw std::runtime_error("[SYCLcompat] only DLLs for x64 supported"); + } + + // number of sections + uint16_t number_of_sections = extract16(pe_header + 6); + + // sizeof optional header + uint16_t sizeof_optional_header = extract16(pe_header + 20); + + // magic + uint16_t magic = extract16(pe_header + 24); + if (magic != 0x10b && magic != 0x20b) { + throw std::runtime_error("[SYCLcompat] MAGIC is not 0x010b or 0x020b"); + } + + /////////////////////////////////////////////////////////////////////// + // Analyze tail of optional header + constexpr int coff_header_size = 24; + + unsigned char const *const tail_of_optional_header = + pe_header + coff_header_size + sizeof_optional_header; + if (extract64(tail_of_optional_header - 8) != 0) { + throw std::runtime_error("Optional header not zero-padded"); + } + + /////////////////////////////////////////////////////////////////////// + // Analyze last section header + constexpr int section_header_size = 40; + unsigned char const *const last_section_header = + tail_of_optional_header + section_header_size * (number_of_sections - 1); + + uint32_t sizeof_raw_data = extract32(last_section_header + 16); + uint32_t pointer_to_raw_data = extract32(last_section_header + 20); + + return sizeof_raw_data + pointer_to_raw_data; +#else + if (blob[0] != 0x7F || blob[1] != 'E' || blob[2] != 'L' || blob[3] != 'F') + throw std::runtime_error("[SYCLcompat] blob is not in ELF format"); + + if (blob[4] != 0x02) + throw std::runtime_error("[SYCLcompat] only 64-bit headers are supported"); + + if (blob[5] != 0x01) + throw std::runtime_error( + "[SYCLcompat] only little-endian headers are supported"); + + unsigned char const *const ublob = + reinterpret_cast(blob); + uint64_t e_shoff = extract64(ublob + 0x28); + uint16_t e_shentsize = extract16(ublob + 0x3A); + uint16_t e_shnum = extract16(ublob + 0x3C); + + return e_shoff + (e_shentsize * e_shnum); +#endif +} + +#ifdef _WIN32 +class path_lib_record { +public: + void operator=(const path_lib_record &) = delete; + ~path_lib_record() { + for (auto entry : lib_to_path) { + FreeLibrary(static_cast(entry.first)); + fs::permissions(entry.second, fs::perms::owner_all); + fs::remove_all(entry.second.remove_filename()); + } + } + static void record_lib_path(fs::path path, void *library) { + lib_to_path[library] = path; + } + static void remove_lib(void *library) { + auto path = lib_to_path[library]; + std::error_code ec; + + FreeLibrary(static_cast(library)); + fs::permissions(path, fs::perms::owner_all); + if (fs::remove_all(path.remove_filename(), ec) != 2 || ec) + // one directory and one temporary file should have been deleted + throw std::runtime_error("[SYCLcompat] directory delete failed"); + + lib_to_path.erase(library); + } + +private: + static inline std::unordered_map lib_to_path; +}; +#endif + +} // namespace detail + +class kernel_library { +public: + kernel_library() : ptr{nullptr} {} + kernel_library(void *ptr) : ptr{ptr} {} + + operator void *() const { return ptr; } + +private: + void *ptr; +#ifdef _WIN32 + static inline detail::path_lib_record single_instance_to_trigger_destructor; +#endif +}; + +namespace detail { + +static inline kernel_library load_dl_from_data(char const *const data, + size_t size) { + fs::path filename = write_data_to_file(data, size); +#ifdef _WIN32 + void *so = LoadLibraryW(filename.wstring().c_str()); +#else + void *so = dlopen(filename.c_str(), RTLD_LAZY); +#endif + if (so == nullptr) + throw std::runtime_error("[SYCLcompat] failed to load kernel library"); + +#ifdef _WIN32 + detail::path_lib_record::record_lib_path(filename, so); +#else + std::error_code ec; + + // Windows DLL cannot be deleted while in use + if (fs::remove_all(filename.remove_filename(), ec) != 2 || ec) + // one directory and one temporary file should have been deleted + throw std::runtime_error("[SYCLcompat] directory delete failed"); +#endif + + return so; +} + +} // namespace detail + +/// Load kernel library and return a handle to use the library. +/// \param [in] name The name of the library. +static inline kernel_library load_kernel_library(const std::string &name) { + std::ifstream ifs; + ifs.open(name, std::ios::in | std::ios::binary); + + std::stringstream buffer; + buffer << ifs.rdbuf(); + + const std::string buffer_string = buffer.str(); + return detail::load_dl_from_data(buffer_string.c_str(), buffer_string.size()); +} + +/// Load kernel library whose image is alreay in memory and return a handle to +/// use the library. +/// \param [in] image A pointer to the image in memory. +static inline kernel_library load_kernel_library_mem(char const *const image) { + const size_t size = detail::get_lib_size(image); + + return detail::load_dl_from_data(image, size); +} + +/// Unload kernel library. +/// \param [in,out] library Handle to the library to be closed. +static inline void unload_kernel_library(const kernel_library &library) { +#ifdef _WIN32 + detail::path_lib_record::remove_lib(library); +#else + dlclose(library); +#endif +} + +class kernel_function { +public: + kernel_function() : ptr{nullptr} {} + kernel_function(kernel_functor ptr) : ptr{ptr} {} + + operator void *() const { return ((void *)ptr); } + + void operator()(sycl::queue &q, const sycl::nd_range<3> &range, + unsigned int a, void **args, void **extra) { + ptr(q, range, a, args, extra); + } + +private: + kernel_functor ptr; +}; + +/// Find kernel function in a kernel library and return its address. +/// \param [in] library Handle to the kernel library. +/// \param [in] name Name of the kernel function. +static inline kernel_function get_kernel_function(kernel_library &library, + const std::string &name) { +#ifdef _WIN32 + kernel_functor fn = reinterpret_cast( + GetProcAddress(static_cast(static_cast(library)), + (name + std::string("_wrapper")).c_str())); +#else + kernel_functor fn = reinterpret_cast( + dlsym(library, (name + std::string("_wrapper")).c_str())); +#endif + if (fn == nullptr) + throw std::runtime_error("[SYCLcompat] failed to get function"); + return fn; +} + +/// Invoke a kernel function. +/// \param [in] function kernel function. +/// \param [in] queue SYCL queue used to execute kernel +/// \param [in] groupRange SYCL group range +/// \param [in] localRange SYCL local range +/// \param [in] localMemSize The size of local memory required by the kernel +/// function. +/// \param [in] kernelParams Array of pointers to kernel arguments. +/// \param [in] extra Extra arguments. +static inline void invoke_kernel_function(kernel_function &function, + sycl::queue &queue, + sycl::range<3> groupRange, + sycl::range<3> localRange, + unsigned int localMemSize, + void **kernelParams, void **extra) { + function(queue, sycl::nd_range<3>(groupRange * localRange, localRange), + localMemSize, kernelParams, extra); +} + } // namespace syclcompat diff --git a/sycl/test-e2e/syclcompat/defs.cpp b/sycl/test-e2e/syclcompat/defs.cpp index 01681d13610f7..3d66331804599 100644 --- a/sycl/test-e2e/syclcompat/defs.cpp +++ b/sycl/test-e2e/syclcompat/defs.cpp @@ -17,22 +17,40 @@ * Defs.cpp * * Description: - * __sycl_compat_align__ tests + * __syclcompat_align__ tests **************************************************************************/ // RUN: %clangxx -fsycl %s -o %t.out // RUN: %{run} %t.out #include +#include #include -int main() { - struct __sycl_compat_align__(16) { +void test_align() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + constexpr std::size_t expected_size = 16; + struct __syclcompat_align__(expected_size) { int a; char c; } s; - assert(sizeof(s) == 16); + assert(sizeof(s) == expected_size); +} + +void test_check_error() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + assert(syclcompat::error_code::SUCCESS == SYCLCOMPAT_CHECK_ERROR(0)); + assert(syclcompat::error_code::DEFAULT_ERROR == + SYCLCOMPAT_CHECK_ERROR(throw std::runtime_error( + "Expected exception in test_check_error"))); +} + +int main() { + test_align(); + test_check_error(); return 0; } diff --git a/sycl/test-e2e/syclcompat/device/device.cpp b/sycl/test-e2e/syclcompat/device/device.cpp index c807340301586..521b0b4957365 100644 --- a/sycl/test-e2e/syclcompat/device/device.cpp +++ b/sycl/test-e2e/syclcompat/device/device.cpp @@ -36,156 +36,200 @@ #include "device_fixt.hpp" -int main() { - /* - Device Tests - */ - std::cout << "Testing AtLeastOneDevice" << std::endl; - { - DeviceTestsFixt dtf; - assert(dtf.get_n_devices() > 0); - } +/* + Device Tests +*/ +void test_at_least_one_device() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + DeviceTestsFixt dtf; + assert(dtf.get_n_devices() > 0); +} - // Check the device returned matches the device ID - std::cout << "Testing MatchesID" << std::endl; - { - assert(syclcompat::get_device(syclcompat::get_current_device_id()) == - syclcompat::get_current_device()); - } +// Check the device returned matches the device ID +void test_matches_id() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + assert(syclcompat::get_device(syclcompat::get_current_device_id()) == + syclcompat::get_current_device()); +} - // Check error on insufficient devices - std::cout << "Testing NotEnoughDevices" << std::endl; - { - DeviceTestsFixt dtf; - try { - syclcompat::select_device(dtf.get_n_devices()); - } catch (std::runtime_error const &e) { - std::cout << "Expected SYCL exception caught: " << e.what(); - } +// Check error on insufficient devices +void test_not_enough_devices() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + DeviceTestsFixt dtf; + try { + syclcompat::select_device(dtf.get_n_devices()); + } catch (std::runtime_error const &e) { + std::cout << "Expected SYCL exception caught: " << e.what(); } +} - // Check the default context matches default queue's context - std::cout << "Testing DefaultContext" << std::endl; - { - DeviceTestsFixt dtf; - assert(dtf.get_queue().get_context() == syclcompat::get_default_context()); - } +// Check the default context matches default queue's context +void test_default_context() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + DeviceTestsFixt dtf; + assert(dtf.get_queue().get_context() == syclcompat::get_default_context()); +} - /* - Queue Tests - */ - std::cout << "Testing MakeInOrderQueue" << std::endl; - { - sycl::queue q = syclcompat::get_default_queue(); - assert(q.is_in_order()); - } +/* + Queue Tests +*/ +void test_make_in_order_queue() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + sycl::queue q = syclcompat::get_default_queue(); + assert(q.is_in_order()); +} - std::cout << "Testing CheckDefaultDevice" << std::endl; - { - sycl::queue q = syclcompat::get_default_queue(); - assert(q.get_device() == sycl::device{sycl::default_selector_v}); - } +void test_check_default_device() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + sycl::queue q = syclcompat::get_default_queue(); + assert(q.get_device() == sycl::device{sycl::default_selector_v}); +} - // Check behaviour of in order & out of order queue construction - std::cout << "Testing QueuePropOrder" << std::endl; - { - sycl::queue q_create_def{syclcompat::create_queue()}; - assert(q_create_def.is_in_order()); - sycl::queue q_in_order{syclcompat::create_queue(false, true)}; - assert(q_in_order.is_in_order()); - sycl::queue q_out_order{syclcompat::create_queue(false, false)}; - assert(!q_out_order.is_in_order()); - } +// Check behaviour of in order & out of order queue construction +void test_create_queue_arguments() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + sycl::queue q_create_def{syclcompat::create_queue()}; + assert(q_create_def.is_in_order()); + sycl::queue q_in_order{syclcompat::create_queue(false, true)}; + assert(q_in_order.is_in_order()); + sycl::queue q_out_order{syclcompat::create_queue(false, false)}; + assert(!q_out_order.is_in_order()); +} - /* - Device Extension Tests - */ - std::cout << "Testing DeviceExtAPI" << std::endl; - { - DeviceExtFixt dev_ext; - auto &dev_ = dev_ext.get_dev_ext(); - dev_.is_native_host_atomic_supported(); - dev_.get_major_version(); - dev_.get_minor_version(); - dev_.get_max_compute_units(); - dev_.get_max_clock_frequency(); - dev_.get_integrated(); - syclcompat::device_info Info; - dev_.get_device_info(Info); - Info = dev_.get_device_info(); - dev_.reset(); - auto QueuePtr = dev_.default_queue(); - dev_.queues_wait_and_throw(); - QueuePtr = dev_.create_queue(); - dev_.destroy_queue(QueuePtr); - QueuePtr = dev_.create_queue(); - dev_.set_saved_queue(QueuePtr); - QueuePtr = dev_.get_saved_queue(); - auto Context = dev_.get_context(); - } +/* + Device Extension Tests +*/ +void test_device_ext_api() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + DeviceExtFixt dev_ext; + auto &dev_ = dev_ext.get_dev_ext(); + dev_.is_native_host_atomic_supported(); + dev_.get_major_version(); + dev_.get_minor_version(); + dev_.get_max_compute_units(); + dev_.get_max_clock_frequency(); + dev_.get_integrated(); + syclcompat::device_info Info; + dev_.get_device_info(Info); + Info = dev_.get_device_info(); + dev_.reset(); + auto QueuePtr = dev_.default_queue(); + dev_.queues_wait_and_throw(); + QueuePtr = dev_.create_queue(); + dev_.destroy_queue(QueuePtr); + QueuePtr = dev_.create_queue(); + dev_.set_saved_queue(QueuePtr); + QueuePtr = dev_.get_saved_queue(); + auto Context = dev_.get_context(); +} - std::cout << "Testing DefaultSavedQueue" << std::endl; - { - DeviceExtFixt dev_ext; - auto &dev_ = dev_ext.get_dev_ext(); - assert(*dev_.default_queue() == *dev_.get_saved_queue()); - } +void test_default_saved_queue() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + DeviceExtFixt dev_ext; + auto &dev_ = dev_ext.get_dev_ext(); + assert(*dev_.default_queue() == *dev_.get_saved_queue()); +} - std::cout << "Testing SavedQueue" << std::endl; - { - DeviceExtFixt dev_ext; - auto &dev_ = dev_ext.get_dev_ext(); - auto q = *dev_.create_queue(); - dev_.set_saved_queue(&q); - assert(q == *dev_.get_saved_queue()); - } +void test_saved_queue() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + DeviceExtFixt dev_ext; + auto &dev_ = dev_ext.get_dev_ext(); + auto q = *dev_.create_queue(); + dev_.set_saved_queue(&q); + assert(q == *dev_.get_saved_queue()); +} - // Check reset() resets the queues etc - std::cout << "Testing Reset" << std::endl; - { - DeviceExtFixt dev_ext; - auto &dev_ = dev_ext.get_dev_ext(); - auto q = *dev_.create_queue(); - dev_.set_saved_queue(&q); - dev_.reset(); - assert(q != *dev_.get_saved_queue()); - } +// Check reset() resets the queues etc +void test_reset() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + DeviceExtFixt dev_ext; + auto &dev_ = dev_ext.get_dev_ext(); + auto q = *dev_.create_queue(); + dev_.set_saved_queue(&q); + dev_.reset(); + assert(q != *dev_.get_saved_queue()); +} - std::cout << "Testing DeviceInfoAPI" << std::endl; - { - syclcompat::device_info Info; - const char *Name = "DEVNAME"; - Info.set_name(Name); - sycl::id<3> max_work_item_sizes; - Info.set_max_work_item_sizes(max_work_item_sizes); - Info.set_major_version(1); - Info.set_minor_version(1); - Info.set_integrated(1); - Info.set_max_clock_frequency(1000); - Info.set_max_compute_units(32); - Info.set_global_mem_size(1000); - Info.set_local_mem_size(1000); - Info.set_max_work_group_size(32); - Info.set_max_sub_group_size(16); - Info.set_max_work_items_per_compute_unit(16); - int SizeArray[3] = {1, 2, 3}; - Info.set_max_nd_range_size(SizeArray); - - assert(!strcmp(Info.get_name(), Name)); - assert(Info.get_max_work_item_sizes() == max_work_item_sizes); - assert(Info.get_minor_version() == 1); - assert(Info.get_integrated() == 1); - assert(Info.get_max_clock_frequency() == 1000); - assert(Info.get_max_compute_units() == 32); - assert(Info.get_max_work_group_size() == 32); - assert(Info.get_max_sub_group_size() == 16); - assert(Info.get_max_work_items_per_compute_unit() == 16); - assert(Info.get_max_nd_range_size()[0] == SizeArray[0]); - assert(Info.get_max_nd_range_size()[1] == SizeArray[1]); - assert(Info.get_max_nd_range_size()[2] == SizeArray[2]); - assert(Info.get_global_mem_size() == 1000); - assert(Info.get_local_mem_size() == 1000); - } +void test_reset_arguments() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + DeviceExtFixt dev_ext; + auto &dev_ = dev_ext.get_dev_ext(); + + dev_.reset(false, false); + assert(!dev_.get_saved_queue()->is_in_order()); + + dev_.reset(false, true); + assert(dev_.get_saved_queue()->is_in_order()); +} + +void test_device_info_api() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + syclcompat::device_info Info; + const char *Name = "DEVNAME"; + std::array uuid; + uuid.fill('0'); + sycl::range<3> max_work_item_sizes; + + Info.set_name(Name); + Info.set_max_work_item_sizes(max_work_item_sizes); + Info.set_major_version(1); + Info.set_minor_version(1); + Info.set_integrated(1); + Info.set_max_clock_frequency(1000); + Info.set_max_compute_units(32); + Info.set_global_mem_size(1000); + Info.set_local_mem_size(1000); + Info.set_max_work_group_size(32); + Info.set_max_sub_group_size(16); + Info.set_max_work_items_per_compute_unit(16); + int SizeArray[3] = {1, 2, 3}; + Info.set_max_nd_range_size(SizeArray); + + Info.set_host_unified_memory(true); + Info.set_memory_clock_rate(1000); + Info.set_max_register_size_per_work_group(1000); + Info.set_device_id(0); + Info.set_uuid(uuid); + Info.set_global_mem_cache_size(1000); + + assert(!strcmp(Info.get_name(), Name)); + assert(Info.get_max_work_item_sizes() == max_work_item_sizes); + assert(Info.get_minor_version() == 1); + assert(Info.get_integrated() == 1); + assert(Info.get_max_clock_frequency() == 1000); + assert(Info.get_max_compute_units() == 32); + assert(Info.get_max_work_group_size() == 32); + assert(Info.get_max_sub_group_size() == 16); + assert(Info.get_max_work_items_per_compute_unit() == 16); + assert(Info.get_max_nd_range_size()[0] == SizeArray[0]); + assert(Info.get_max_nd_range_size()[1] == SizeArray[1]); + assert(Info.get_max_nd_range_size()[2] == SizeArray[2]); + assert(Info.get_global_mem_size() == 1000); + assert(Info.get_local_mem_size() == 1000); + + uuid.fill('0'); // set_uuid uses std::move + assert(Info.get_host_unified_memory()); + assert(Info.get_memory_clock_rate() == 1000); + assert(Info.get_max_register_size_per_work_group() == 1000); + assert(Info.get_device_id() == 0); + assert(Info.get_uuid() == uuid); + assert(Info.get_global_mem_cache_size() == 1000); +} + +int main() { + test_at_least_one_device(); + test_matches_id(); + test_not_enough_devices(); + test_default_context(); + test_make_in_order_queue(); + test_check_default_device(); + test_create_queue_arguments(); + test_device_ext_api(); + test_default_saved_queue(); + test_saved_queue(); + test_reset(); + test_device_info_api(); return 0; } diff --git a/sycl/test-e2e/syclcompat/device/device_threaded.cpp b/sycl/test-e2e/syclcompat/device/device_threaded.cpp index a21a375427d15..c909a507d0740 100644 --- a/sycl/test-e2e/syclcompat/device/device_threaded.cpp +++ b/sycl/test-e2e/syclcompat/device/device_threaded.cpp @@ -38,34 +38,39 @@ #include "device_fixt.hpp" -int main() { - // Check a thread is able to select a non-default device - std::cout << "Testing DeviceSelect" << std::endl; - { - DeviceTestsFixt dtf; - if (dtf.get_n_devices() > 1) { - constexpr unsigned int TARGET_DEV = 1; - unsigned int thread_dev_id{}; - std::thread other_thread{[&]() { - syclcompat::select_device(TARGET_DEV); - thread_dev_id = syclcompat::get_current_device_id(); - }}; - other_thread.join(); - assert(thread_dev_id == TARGET_DEV); - } else { - std::cout << " Skipping, only doable with multiple devices" << std::endl; - } - } +// Check a thread is able to select a non-default device +void test_device_select() { + std::cout << __PRETTY_FUNCTION__ << std::endl; - // Check multiple threads get same device by default - std::cout << "Testing Threads" << std::endl; - { + DeviceTestsFixt dtf; + if (dtf.get_n_devices() > 1) { + constexpr unsigned int TARGET_DEV = 1; unsigned int thread_dev_id{}; - std::thread other_thread{ - [&]() { thread_dev_id = syclcompat::get_current_device_id(); }}; + std::thread other_thread{[&]() { + syclcompat::select_device(TARGET_DEV); + thread_dev_id = syclcompat::get_current_device_id(); + }}; other_thread.join(); - assert(thread_dev_id == syclcompat::get_current_device_id()); + assert(thread_dev_id == TARGET_DEV); + } else { + std::cout << " Skipping, only doable with multiple devices" << std::endl; } +} + +// Check multiple threads get same device by default +void test_threads() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + unsigned int thread_dev_id{}; + std::thread other_thread{ + [&]() { thread_dev_id = syclcompat::get_current_device_id(); }}; + other_thread.join(); + assert(thread_dev_id == syclcompat::get_current_device_id()); +} + +int main() { + test_device_select(); + test_threads(); return 0; } diff --git a/sycl/test-e2e/syclcompat/id_query/id_query_fixt.hpp b/sycl/test-e2e/syclcompat/id_query/id_query_fixt.hpp index 9726982dafc4e..d8d8b56fa5fb2 100644 --- a/sycl/test-e2e/syclcompat/id_query/id_query_fixt.hpp +++ b/sycl/test-e2e/syclcompat/id_query/id_query_fixt.hpp @@ -23,7 +23,9 @@ #pragma once #include -#include +#include +#include +#include // Class to launch a kernel and run a lambda on output data template class QueryLauncher { @@ -40,7 +42,7 @@ template class QueryLauncher { : grid_{grid}, threads_{threads}, size_{grid_.size() * threads_.size()}, host_data_(size_) { data_ = (int *)syclcompat::malloc(size_ * sizeof(int)); - syclcompat::memset(data_, 0, size_ * sizeof(int)); + syclcompat::memset(data_, 0, size_); }; ~QueryLauncher() { syclcompat::free(data_); } template diff --git a/sycl/test-e2e/syclcompat/launch/launch.cpp b/sycl/test-e2e/syclcompat/launch/launch.cpp index a54bbb8fc4e43..26bd825fffeca 100644 --- a/sycl/test-e2e/syclcompat/launch/launch.cpp +++ b/sycl/test-e2e/syclcompat/launch/launch.cpp @@ -23,11 +23,12 @@ // RUN: %clangxx -std=c++20 -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out +#include + #include #include #include - -#include +#include #include "../common.hpp" #include "launch_fixt.hpp" diff --git a/sycl/test-e2e/syclcompat/launch/launch_fixt.hpp b/sycl/test-e2e/syclcompat/launch/launch_fixt.hpp index 793a4f2dfa7ec..258178495ec68 100644 --- a/sycl/test-e2e/syclcompat/launch/launch_fixt.hpp +++ b/sycl/test-e2e/syclcompat/launch/launch_fixt.hpp @@ -23,7 +23,9 @@ #pragma once #include -#include + +#include +#include // Struct containing test case data (local & global ranges) template struct RangeParams {