From 0f07c694583805dcb46e735eab55fad8b8ed7581 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Wed, 29 Nov 2023 11:25:34 +0000 Subject: [PATCH] [Bindless][Exp] Add device queries for sampled image fetch Added the following queries for device capabilities of fetching sampled images: - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP --- include/ur_api.h | 396 +++++++++--------- include/ur_print.hpp | 90 ++++ scripts/core/EXP-BINDLESS-IMAGES.rst | 8 + scripts/core/exp-bindless-images.yml | 18 + source/adapters/cuda/device.cpp | 24 ++ source/loader/layers/validation/ur_valddi.cpp | 2 +- source/loader/ur_libapi.cpp | 2 +- source/ur_api.cpp | 2 +- tools/urinfo/urinfo.hpp | 18 + 9 files changed, 365 insertions(+), 195 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 7ba79f4e13..4753cc26c2 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -1431,197 +1431,209 @@ urDeviceGetSelected( /////////////////////////////////////////////////////////////////////////////// /// @brief Supported device info typedef enum ur_device_info_t { - UR_DEVICE_INFO_TYPE = 0, ///< [::ur_device_type_t] type of the device - UR_DEVICE_INFO_VENDOR_ID = 1, ///< [uint32_t] vendor Id of the device - UR_DEVICE_INFO_DEVICE_ID = 2, ///< [uint32_t] Id of the device - UR_DEVICE_INFO_MAX_COMPUTE_UNITS = 3, ///< [uint32_t] the number of compute units - UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS = 4, ///< [uint32_t] max work item dimensions - UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES = 5, ///< [size_t[]] return an array of max work item sizes - UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE = 6, ///< [size_t] max work group size - UR_DEVICE_INFO_SINGLE_FP_CONFIG = 7, ///< [::ur_device_fp_capability_flags_t] single precision floating point - ///< capability - UR_DEVICE_INFO_HALF_FP_CONFIG = 8, ///< [::ur_device_fp_capability_flags_t] half precision floating point - ///< capability - UR_DEVICE_INFO_DOUBLE_FP_CONFIG = 9, ///< [::ur_device_fp_capability_flags_t] double precision floating point - ///< capability - UR_DEVICE_INFO_QUEUE_PROPERTIES = 10, ///< [::ur_queue_flags_t] command queue properties supported by the device - UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR = 11, ///< [uint32_t] preferred vector width for char - UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT = 12, ///< [uint32_t] preferred vector width for short - UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT = 13, ///< [uint32_t] preferred vector width for int - UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG = 14, ///< [uint32_t] preferred vector width for long - UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT = 15, ///< [uint32_t] preferred vector width for float - UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE = 16, ///< [uint32_t] preferred vector width for double - UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF = 17, ///< [uint32_t] preferred vector width for half float - UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = 18, ///< [uint32_t] native vector width for char - UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = 19, ///< [uint32_t] native vector width for short - UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = 20, ///< [uint32_t] native vector width for int - UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = 21, ///< [uint32_t] native vector width for long - UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = 22, ///< [uint32_t] native vector width for float - UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = 23, ///< [uint32_t] native vector width for double - UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = 24, ///< [uint32_t] native vector width for half float - UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY = 25, ///< [uint32_t] max clock frequency in MHz - UR_DEVICE_INFO_MEMORY_CLOCK_RATE = 26, ///< [uint32_t] memory clock frequency in MHz - UR_DEVICE_INFO_ADDRESS_BITS = 27, ///< [uint32_t] address bits - UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = 28, ///< [uint64_t] max memory allocation size - UR_DEVICE_INFO_IMAGE_SUPPORTED = 29, ///< [::ur_bool_t] images are supported - UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS = 30, ///< [uint32_t] max number of image objects arguments of a kernel declared - ///< with the read_only qualifier - UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS = 31, ///< [uint32_t] max number of image objects arguments of a kernel declared - ///< with the write_only qualifier - UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS = 32, ///< [uint32_t] max number of image objects arguments of a kernel declared - ///< with the read_write qualifier - UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH = 33, ///< [size_t] max width of Image2D object - UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT = 34, ///< [size_t] max height of Image2D object - UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH = 35, ///< [size_t] max width of Image3D object - UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT = 36, ///< [size_t] max height of Image3D object - UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH = 37, ///< [size_t] max depth of Image3D object - UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE = 38, ///< [size_t] max image buffer size - UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = 39, ///< [size_t] max image array size - UR_DEVICE_INFO_MAX_SAMPLERS = 40, ///< [uint32_t] max number of samplers that can be used in a kernel - UR_DEVICE_INFO_MAX_PARAMETER_SIZE = 41, ///< [size_t] max size in bytes of all arguments passed to a kernel - UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN = 42, ///< [uint32_t] memory base address alignment - UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = 43, ///< [::ur_device_mem_cache_type_t] global memory cache type - UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = 44, ///< [uint32_t] global memory cache line size in bytes - UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = 45, ///< [uint64_t] size of global memory cache in bytes - UR_DEVICE_INFO_GLOBAL_MEM_SIZE = 46, ///< [uint64_t] size of global memory in bytes - UR_DEVICE_INFO_GLOBAL_MEM_FREE = 47, ///< [uint64_t] size of global memory which is free in bytes - UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = 48, ///< [uint64_t] max constant buffer size in bytes - UR_DEVICE_INFO_MAX_CONSTANT_ARGS = 49, ///< [uint32_t] max number of __const declared arguments in a kernel - UR_DEVICE_INFO_LOCAL_MEM_TYPE = 50, ///< [::ur_device_local_mem_type_t] local memory type - UR_DEVICE_INFO_LOCAL_MEM_SIZE = 51, ///< [uint64_t] local memory size in bytes - UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = 52, ///< [::ur_bool_t] support error correction to global and local memory - UR_DEVICE_INFO_HOST_UNIFIED_MEMORY = 53, ///< [::ur_bool_t] unified host device memory - UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = 54, ///< [size_t] profiling timer resolution in nanoseconds - UR_DEVICE_INFO_ENDIAN_LITTLE = 55, ///< [::ur_bool_t] little endian byte order - UR_DEVICE_INFO_AVAILABLE = 56, ///< [::ur_bool_t] device is available - UR_DEVICE_INFO_COMPILER_AVAILABLE = 57, ///< [::ur_bool_t] device compiler is available - UR_DEVICE_INFO_LINKER_AVAILABLE = 58, ///< [::ur_bool_t] device linker is available - UR_DEVICE_INFO_EXECUTION_CAPABILITIES = 59, ///< [::ur_device_exec_capability_flags_t] device kernel execution - ///< capability bit-field - UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES = 60, ///< [::ur_queue_flags_t] device command queue property bit-field - UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES = 61, ///< [::ur_queue_flags_t] host queue property bit-field - UR_DEVICE_INFO_BUILT_IN_KERNELS = 62, ///< [char[]] a semi-colon separated list of built-in kernels - UR_DEVICE_INFO_PLATFORM = 63, ///< [::ur_platform_handle_t] the platform associated with the device - UR_DEVICE_INFO_REFERENCE_COUNT = 64, ///< [uint32_t] Reference count of the device object. - ///< The reference count returned should be considered immediately stale. - ///< It is unsuitable for general use in applications. This feature is - ///< provided for identifying memory leaks. - UR_DEVICE_INFO_IL_VERSION = 65, ///< [char[]] IL version - UR_DEVICE_INFO_NAME = 66, ///< [char[]] Device name - UR_DEVICE_INFO_VENDOR = 67, ///< [char[]] Device vendor - UR_DEVICE_INFO_DRIVER_VERSION = 68, ///< [char[]] Driver version - UR_DEVICE_INFO_PROFILE = 69, ///< [char[]] Device profile - UR_DEVICE_INFO_VERSION = 70, ///< [char[]] Device version - UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION = 71, ///< [char[]] Version of backend runtime - UR_DEVICE_INFO_EXTENSIONS = 72, ///< [char[]] Return a space separated list of extension names - UR_DEVICE_INFO_PRINTF_BUFFER_SIZE = 73, ///< [size_t] Maximum size in bytes of internal printf buffer - UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = 74, ///< [::ur_bool_t] prefer user synchronization when sharing object with - ///< other API - UR_DEVICE_INFO_PARENT_DEVICE = 75, ///< [::ur_device_handle_t] return parent device handle - UR_DEVICE_INFO_SUPPORTED_PARTITIONS = 76, ///< [::ur_device_partition_t[]] Returns an array of partition types - ///< supported by the device - UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = 77, ///< [uint32_t] maximum number of sub-devices when the device is - ///< partitioned - UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = 78, ///< [::ur_device_affinity_domain_flags_t] Returns a bit-field of the - ///< supported affinity domains for partitioning. - ///< If the device does not support any affinity domains, then 0 will be returned. - UR_DEVICE_INFO_PARTITION_TYPE = 79, ///< [::ur_device_partition_property_t[]] return an array of - ///< ::ur_device_partition_property_t for properties specified in - ///< ::urDevicePartition - UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS = 80, ///< [uint32_t] max number of sub groups - UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = 81, ///< [::ur_bool_t] support sub group independent forward progress - UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = 82, ///< [uint32_t[]] return an array of sub group sizes supported on Intel - ///< device - UR_DEVICE_INFO_USM_HOST_SUPPORT = 83, ///< [::ur_device_usm_access_capability_flags_t] support USM host memory - ///< access - UR_DEVICE_INFO_USM_DEVICE_SUPPORT = 84, ///< [::ur_device_usm_access_capability_flags_t] support USM device memory - ///< access - UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT = 85, ///< [::ur_device_usm_access_capability_flags_t] support USM single device - ///< shared memory access - UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT = 86, ///< [::ur_device_usm_access_capability_flags_t] support USM cross device - ///< shared memory access - UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT = 87, ///< [::ur_device_usm_access_capability_flags_t] support USM system wide - ///< shared memory access - UR_DEVICE_INFO_UUID = 88, ///< [char[]] return device UUID - UR_DEVICE_INFO_PCI_ADDRESS = 89, ///< [char[]] return device PCI address - UR_DEVICE_INFO_GPU_EU_COUNT = 90, ///< [uint32_t] return Intel GPU EU count - UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH = 91, ///< [uint32_t] return Intel GPU EU SIMD width - UR_DEVICE_INFO_GPU_EU_SLICES = 92, ///< [uint32_t] return Intel GPU number of slices - UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE = 93, ///< [uint32_t] return Intel GPU EU count per subslice - UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE = 94, ///< [uint32_t] return Intel GPU number of subslices per slice - UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 95, ///< [uint32_t] return Intel GPU number of threads per EU - UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH = 96, ///< [uint32_t] return max memory bandwidth in Mb/s - UR_DEVICE_INFO_IMAGE_SRGB = 97, ///< [::ur_bool_t] device supports sRGB images - UR_DEVICE_INFO_BUILD_ON_SUBDEVICE = 98, ///< [::ur_bool_t] Return true if sub-device should do its own program - ///< build - UR_DEVICE_INFO_ATOMIC_64 = 99, ///< [::ur_bool_t] support 64 bit atomics - UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 100, ///< [::ur_memory_order_capability_flags_t] return a bit-field of atomic - ///< memory order capabilities - UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 101, ///< [::ur_memory_scope_capability_flags_t] return a bit-field of atomic - ///< memory scope capabilities - UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 102, ///< [::ur_memory_order_capability_flags_t] return a bit-field of atomic - ///< memory fence order capabilities - UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = 103, ///< [::ur_memory_scope_capability_flags_t] return a bit-field of atomic - ///< memory fence scope capabilities - UR_DEVICE_INFO_BFLOAT16 = 104, ///< [::ur_bool_t] support for bfloat16 - UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES = 105, ///< [uint32_t] Returns 1 if the device doesn't have a notion of a - ///< queue index. Otherwise, returns the number of queue indices that are - ///< available for this device. - UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS = 106, ///< [::ur_bool_t] support the ::urKernelSetSpecializationConstants entry - ///< point - UR_DEVICE_INFO_MEMORY_BUS_WIDTH = 107, ///< [uint32_t] return the width in bits of the memory bus interface of the - ///< device. - UR_DEVICE_INFO_MAX_WORK_GROUPS_3D = 108, ///< [size_t[3]] return max 3D work groups - UR_DEVICE_INFO_ASYNC_BARRIER = 109, ///< [::ur_bool_t] return true if Async Barrier is supported - UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT = 110, ///< [::ur_bool_t] return true if specifying memory channels is supported - UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED = 111, ///< [::ur_bool_t] Return true if the device supports enqueueing commands - ///< to read and write pipes from the host. - UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP = 112, ///< [uint32_t] The maximum number of registers available per block. - UR_DEVICE_INFO_IP_VERSION = 113, ///< [uint32_t] The device IP version. The meaning of the device IP version - ///< is implementation-defined, but newer devices should have a higher - ///< version than older devices. - UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT = 114, ///< [::ur_bool_t] return true if the device supports virtual memory. - UR_DEVICE_INFO_ESIMD_SUPPORT = 115, ///< [::ur_bool_t] return true if the device supports ESIMD. - UR_DEVICE_INFO_COMPONENT_DEVICES = 116, ///< [::ur_device_handle_t[]] The set of component devices contained by - ///< this composite device. - UR_DEVICE_INFO_COMPOSITE_DEVICE = 117, ///< [::ur_device_handle_t] The composite device containing this component - ///< device. - UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000, ///< [::ur_bool_t] Returns true if the device supports the use of - ///< command-buffers. - UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP = 0x1001, ///< [::ur_bool_t] Returns true if the device supports updating the kernel - ///< commands in a command-buffer. - UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP = 0x2000, ///< [::ur_bool_t] returns true if the device supports the creation of - ///< bindless images - UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP = 0x2001, ///< [::ur_bool_t] returns true if the device supports the creation of - ///< bindless images backed by shared USM - UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP = 0x2002, ///< [::ur_bool_t] returns true if the device supports the creation of 1D - ///< bindless images backed by USM - UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP = 0x2003, ///< [::ur_bool_t] returns true if the device supports the creation of 2D - ///< bindless images backed by USM - UR_DEVICE_INFO_IMAGE_PITCH_ALIGN_EXP = 0x2004, ///< [uint32_t] returns the required alignment of the pitch between two - ///< rows of an image in bytes - UR_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH_EXP = 0x2005, ///< [size_t] returns the maximum linear width allowed for images allocated - ///< using USM - UR_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT_EXP = 0x2006, ///< [size_t] returns the maximum linear height allowed for images - ///< allocated using USM - UR_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH_EXP = 0x2007, ///< [size_t] returns the maximum linear pitch allowed for images allocated - ///< using USM - UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP = 0x2008, ///< [::ur_bool_t] returns true if the device supports allocating mipmap - ///< resources - UR_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT_EXP = 0x2009, ///< [::ur_bool_t] returns true if the device supports sampling mipmap - ///< images with anisotropic filtering - UR_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY_EXP = 0x200A, ///< [uint32_t] returns the maximum anisotropic ratio supported by the - ///< device - UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP = 0x200B, ///< [::ur_bool_t] returns true if the device supports using images created - ///< from individual mipmap levels - UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP = 0x200C, ///< [::ur_bool_t] returns true if the device supports importing external - ///< memory resources - UR_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT_EXP = 0x200D, ///< [::ur_bool_t] returns true if the device supports exporting internal - ///< memory resources - UR_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT_EXP = 0x200E, ///< [::ur_bool_t] returns true if the device supports importing external - ///< semaphore resources - UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP = 0x200F, ///< [::ur_bool_t] returns true if the device supports exporting internal - ///< event resources + UR_DEVICE_INFO_TYPE = 0, ///< [::ur_device_type_t] type of the device + UR_DEVICE_INFO_VENDOR_ID = 1, ///< [uint32_t] vendor Id of the device + UR_DEVICE_INFO_DEVICE_ID = 2, ///< [uint32_t] Id of the device + UR_DEVICE_INFO_MAX_COMPUTE_UNITS = 3, ///< [uint32_t] the number of compute units + UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS = 4, ///< [uint32_t] max work item dimensions + UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES = 5, ///< [size_t[]] return an array of max work item sizes + UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE = 6, ///< [size_t] max work group size + UR_DEVICE_INFO_SINGLE_FP_CONFIG = 7, ///< [::ur_device_fp_capability_flags_t] single precision floating point + ///< capability + UR_DEVICE_INFO_HALF_FP_CONFIG = 8, ///< [::ur_device_fp_capability_flags_t] half precision floating point + ///< capability + UR_DEVICE_INFO_DOUBLE_FP_CONFIG = 9, ///< [::ur_device_fp_capability_flags_t] double precision floating point + ///< capability + UR_DEVICE_INFO_QUEUE_PROPERTIES = 10, ///< [::ur_queue_flags_t] command queue properties supported by the device + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR = 11, ///< [uint32_t] preferred vector width for char + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT = 12, ///< [uint32_t] preferred vector width for short + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT = 13, ///< [uint32_t] preferred vector width for int + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG = 14, ///< [uint32_t] preferred vector width for long + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT = 15, ///< [uint32_t] preferred vector width for float + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE = 16, ///< [uint32_t] preferred vector width for double + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF = 17, ///< [uint32_t] preferred vector width for half float + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = 18, ///< [uint32_t] native vector width for char + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = 19, ///< [uint32_t] native vector width for short + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = 20, ///< [uint32_t] native vector width for int + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = 21, ///< [uint32_t] native vector width for long + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = 22, ///< [uint32_t] native vector width for float + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = 23, ///< [uint32_t] native vector width for double + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = 24, ///< [uint32_t] native vector width for half float + UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY = 25, ///< [uint32_t] max clock frequency in MHz + UR_DEVICE_INFO_MEMORY_CLOCK_RATE = 26, ///< [uint32_t] memory clock frequency in MHz + UR_DEVICE_INFO_ADDRESS_BITS = 27, ///< [uint32_t] address bits + UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = 28, ///< [uint64_t] max memory allocation size + UR_DEVICE_INFO_IMAGE_SUPPORTED = 29, ///< [::ur_bool_t] images are supported + UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS = 30, ///< [uint32_t] max number of image objects arguments of a kernel declared + ///< with the read_only qualifier + UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS = 31, ///< [uint32_t] max number of image objects arguments of a kernel declared + ///< with the write_only qualifier + UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS = 32, ///< [uint32_t] max number of image objects arguments of a kernel declared + ///< with the read_write qualifier + UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH = 33, ///< [size_t] max width of Image2D object + UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT = 34, ///< [size_t] max height of Image2D object + UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH = 35, ///< [size_t] max width of Image3D object + UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT = 36, ///< [size_t] max height of Image3D object + UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH = 37, ///< [size_t] max depth of Image3D object + UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE = 38, ///< [size_t] max image buffer size + UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = 39, ///< [size_t] max image array size + UR_DEVICE_INFO_MAX_SAMPLERS = 40, ///< [uint32_t] max number of samplers that can be used in a kernel + UR_DEVICE_INFO_MAX_PARAMETER_SIZE = 41, ///< [size_t] max size in bytes of all arguments passed to a kernel + UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN = 42, ///< [uint32_t] memory base address alignment + UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = 43, ///< [::ur_device_mem_cache_type_t] global memory cache type + UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = 44, ///< [uint32_t] global memory cache line size in bytes + UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = 45, ///< [uint64_t] size of global memory cache in bytes + UR_DEVICE_INFO_GLOBAL_MEM_SIZE = 46, ///< [uint64_t] size of global memory in bytes + UR_DEVICE_INFO_GLOBAL_MEM_FREE = 47, ///< [uint64_t] size of global memory which is free in bytes + UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = 48, ///< [uint64_t] max constant buffer size in bytes + UR_DEVICE_INFO_MAX_CONSTANT_ARGS = 49, ///< [uint32_t] max number of __const declared arguments in a kernel + UR_DEVICE_INFO_LOCAL_MEM_TYPE = 50, ///< [::ur_device_local_mem_type_t] local memory type + UR_DEVICE_INFO_LOCAL_MEM_SIZE = 51, ///< [uint64_t] local memory size in bytes + UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = 52, ///< [::ur_bool_t] support error correction to global and local memory + UR_DEVICE_INFO_HOST_UNIFIED_MEMORY = 53, ///< [::ur_bool_t] unified host device memory + UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = 54, ///< [size_t] profiling timer resolution in nanoseconds + UR_DEVICE_INFO_ENDIAN_LITTLE = 55, ///< [::ur_bool_t] little endian byte order + UR_DEVICE_INFO_AVAILABLE = 56, ///< [::ur_bool_t] device is available + UR_DEVICE_INFO_COMPILER_AVAILABLE = 57, ///< [::ur_bool_t] device compiler is available + UR_DEVICE_INFO_LINKER_AVAILABLE = 58, ///< [::ur_bool_t] device linker is available + UR_DEVICE_INFO_EXECUTION_CAPABILITIES = 59, ///< [::ur_device_exec_capability_flags_t] device kernel execution + ///< capability bit-field + UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES = 60, ///< [::ur_queue_flags_t] device command queue property bit-field + UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES = 61, ///< [::ur_queue_flags_t] host queue property bit-field + UR_DEVICE_INFO_BUILT_IN_KERNELS = 62, ///< [char[]] a semi-colon separated list of built-in kernels + UR_DEVICE_INFO_PLATFORM = 63, ///< [::ur_platform_handle_t] the platform associated with the device + UR_DEVICE_INFO_REFERENCE_COUNT = 64, ///< [uint32_t] Reference count of the device object. + ///< The reference count returned should be considered immediately stale. + ///< It is unsuitable for general use in applications. This feature is + ///< provided for identifying memory leaks. + UR_DEVICE_INFO_IL_VERSION = 65, ///< [char[]] IL version + UR_DEVICE_INFO_NAME = 66, ///< [char[]] Device name + UR_DEVICE_INFO_VENDOR = 67, ///< [char[]] Device vendor + UR_DEVICE_INFO_DRIVER_VERSION = 68, ///< [char[]] Driver version + UR_DEVICE_INFO_PROFILE = 69, ///< [char[]] Device profile + UR_DEVICE_INFO_VERSION = 70, ///< [char[]] Device version + UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION = 71, ///< [char[]] Version of backend runtime + UR_DEVICE_INFO_EXTENSIONS = 72, ///< [char[]] Return a space separated list of extension names + UR_DEVICE_INFO_PRINTF_BUFFER_SIZE = 73, ///< [size_t] Maximum size in bytes of internal printf buffer + UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = 74, ///< [::ur_bool_t] prefer user synchronization when sharing object with + ///< other API + UR_DEVICE_INFO_PARENT_DEVICE = 75, ///< [::ur_device_handle_t] return parent device handle + UR_DEVICE_INFO_SUPPORTED_PARTITIONS = 76, ///< [::ur_device_partition_t[]] Returns an array of partition types + ///< supported by the device + UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = 77, ///< [uint32_t] maximum number of sub-devices when the device is + ///< partitioned + UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = 78, ///< [::ur_device_affinity_domain_flags_t] Returns a bit-field of the + ///< supported affinity domains for partitioning. + ///< If the device does not support any affinity domains, then 0 will be returned. + UR_DEVICE_INFO_PARTITION_TYPE = 79, ///< [::ur_device_partition_property_t[]] return an array of + ///< ::ur_device_partition_property_t for properties specified in + ///< ::urDevicePartition + UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS = 80, ///< [uint32_t] max number of sub groups + UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = 81, ///< [::ur_bool_t] support sub group independent forward progress + UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = 82, ///< [uint32_t[]] return an array of sub group sizes supported on Intel + ///< device + UR_DEVICE_INFO_USM_HOST_SUPPORT = 83, ///< [::ur_device_usm_access_capability_flags_t] support USM host memory + ///< access + UR_DEVICE_INFO_USM_DEVICE_SUPPORT = 84, ///< [::ur_device_usm_access_capability_flags_t] support USM device memory + ///< access + UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT = 85, ///< [::ur_device_usm_access_capability_flags_t] support USM single device + ///< shared memory access + UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT = 86, ///< [::ur_device_usm_access_capability_flags_t] support USM cross device + ///< shared memory access + UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT = 87, ///< [::ur_device_usm_access_capability_flags_t] support USM system wide + ///< shared memory access + UR_DEVICE_INFO_UUID = 88, ///< [char[]] return device UUID + UR_DEVICE_INFO_PCI_ADDRESS = 89, ///< [char[]] return device PCI address + UR_DEVICE_INFO_GPU_EU_COUNT = 90, ///< [uint32_t] return Intel GPU EU count + UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH = 91, ///< [uint32_t] return Intel GPU EU SIMD width + UR_DEVICE_INFO_GPU_EU_SLICES = 92, ///< [uint32_t] return Intel GPU number of slices + UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE = 93, ///< [uint32_t] return Intel GPU EU count per subslice + UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE = 94, ///< [uint32_t] return Intel GPU number of subslices per slice + UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 95, ///< [uint32_t] return Intel GPU number of threads per EU + UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH = 96, ///< [uint32_t] return max memory bandwidth in Mb/s + UR_DEVICE_INFO_IMAGE_SRGB = 97, ///< [::ur_bool_t] device supports sRGB images + UR_DEVICE_INFO_BUILD_ON_SUBDEVICE = 98, ///< [::ur_bool_t] Return true if sub-device should do its own program + ///< build + UR_DEVICE_INFO_ATOMIC_64 = 99, ///< [::ur_bool_t] support 64 bit atomics + UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 100, ///< [::ur_memory_order_capability_flags_t] return a bit-field of atomic + ///< memory order capabilities + UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 101, ///< [::ur_memory_scope_capability_flags_t] return a bit-field of atomic + ///< memory scope capabilities + UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 102, ///< [::ur_memory_order_capability_flags_t] return a bit-field of atomic + ///< memory fence order capabilities + UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = 103, ///< [::ur_memory_scope_capability_flags_t] return a bit-field of atomic + ///< memory fence scope capabilities + UR_DEVICE_INFO_BFLOAT16 = 104, ///< [::ur_bool_t] support for bfloat16 + UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES = 105, ///< [uint32_t] Returns 1 if the device doesn't have a notion of a + ///< queue index. Otherwise, returns the number of queue indices that are + ///< available for this device. + UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS = 106, ///< [::ur_bool_t] support the ::urKernelSetSpecializationConstants entry + ///< point + UR_DEVICE_INFO_MEMORY_BUS_WIDTH = 107, ///< [uint32_t] return the width in bits of the memory bus interface of the + ///< device. + UR_DEVICE_INFO_MAX_WORK_GROUPS_3D = 108, ///< [size_t[3]] return max 3D work groups + UR_DEVICE_INFO_ASYNC_BARRIER = 109, ///< [::ur_bool_t] return true if Async Barrier is supported + UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT = 110, ///< [::ur_bool_t] return true if specifying memory channels is supported + UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED = 111, ///< [::ur_bool_t] Return true if the device supports enqueueing commands + ///< to read and write pipes from the host. + UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP = 112, ///< [uint32_t] The maximum number of registers available per block. + UR_DEVICE_INFO_IP_VERSION = 113, ///< [uint32_t] The device IP version. The meaning of the device IP version + ///< is implementation-defined, but newer devices should have a higher + ///< version than older devices. + UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT = 114, ///< [::ur_bool_t] return true if the device supports virtual memory. + UR_DEVICE_INFO_ESIMD_SUPPORT = 115, ///< [::ur_bool_t] return true if the device supports ESIMD. + UR_DEVICE_INFO_COMPONENT_DEVICES = 116, ///< [::ur_device_handle_t[]] The set of component devices contained by + ///< this composite device. + UR_DEVICE_INFO_COMPOSITE_DEVICE = 117, ///< [::ur_device_handle_t] The composite device containing this component + ///< device. + UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000, ///< [::ur_bool_t] Returns true if the device supports the use of + ///< command-buffers. + UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP = 0x1001, ///< [::ur_bool_t] Returns true if the device supports updating the kernel + ///< commands in a command-buffer. + UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP = 0x2000, ///< [::ur_bool_t] returns true if the device supports the creation of + ///< bindless images + UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP = 0x2001, ///< [::ur_bool_t] returns true if the device supports the creation of + ///< bindless images backed by shared USM + UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP = 0x2002, ///< [::ur_bool_t] returns true if the device supports the creation of 1D + ///< bindless images backed by USM + UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP = 0x2003, ///< [::ur_bool_t] returns true if the device supports the creation of 2D + ///< bindless images backed by USM + UR_DEVICE_INFO_IMAGE_PITCH_ALIGN_EXP = 0x2004, ///< [uint32_t] returns the required alignment of the pitch between two + ///< rows of an image in bytes + UR_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH_EXP = 0x2005, ///< [size_t] returns the maximum linear width allowed for images allocated + ///< using USM + UR_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT_EXP = 0x2006, ///< [size_t] returns the maximum linear height allowed for images + ///< allocated using USM + UR_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH_EXP = 0x2007, ///< [size_t] returns the maximum linear pitch allowed for images allocated + ///< using USM + UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP = 0x2008, ///< [::ur_bool_t] returns true if the device supports allocating mipmap + ///< resources + UR_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT_EXP = 0x2009, ///< [::ur_bool_t] returns true if the device supports sampling mipmap + ///< images with anisotropic filtering + UR_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY_EXP = 0x200A, ///< [uint32_t] returns the maximum anisotropic ratio supported by the + ///< device + UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP = 0x200B, ///< [::ur_bool_t] returns true if the device supports using images created + ///< from individual mipmap levels + UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP = 0x200C, ///< [::ur_bool_t] returns true if the device supports importing external + ///< memory resources + UR_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT_EXP = 0x200D, ///< [::ur_bool_t] returns true if the device supports exporting internal + ///< memory resources + UR_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT_EXP = 0x200E, ///< [::ur_bool_t] returns true if the device supports importing external + ///< semaphore resources + UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP = 0x200F, ///< [::ur_bool_t] returns true if the device supports exporting internal + ///< event resources + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP = 0x2010, ///< [::ur_bool_t] returns true if the device is capable of fetching USM + ///< backed 1D sampled image data. + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP = 0x2011, ///< [::ur_bool_t] returns true if the device is capable of fetching + ///< non-USM backed 1D sampled image data. + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP = 0x2012, ///< [::ur_bool_t] returns true if the device is capable of fetching USM + ///< backed 2D sampled image data. + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP = 0x2013, ///< [::ur_bool_t] returns true if the device is capable of fetching + ///< non-USM backed 2D sampled image data. + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP = 0x2014, ///< [::ur_bool_t] returns true if the device is capable of fetching USM + ///< backed 3D sampled image data. + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP = 0x2015, ///< [::ur_bool_t] returns true if the device is capable of fetching + ///< non-USM backed 3D sampled image data. /// @cond UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -1647,7 +1659,7 @@ typedef enum ur_device_info_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 35f0f2e9df..6aebe462f6 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -2537,6 +2537,24 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP: os << "UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP"; break; + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP: + os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP"; + break; + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP: + os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP"; + break; + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP: + os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP"; + break; + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP: + os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP"; + break; + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP: + os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP"; + break; + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP: + os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP"; + break; default: os << "unknown enumerator"; break; @@ -4141,6 +4159,78 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/scripts/core/EXP-BINDLESS-IMAGES.rst b/scripts/core/EXP-BINDLESS-IMAGES.rst index fe6a1ac32b..021c76ca0f 100644 --- a/scripts/core/EXP-BINDLESS-IMAGES.rst +++ b/scripts/core/EXP-BINDLESS-IMAGES.rst @@ -87,6 +87,12 @@ Enums * ${X}_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT_EXP * ${X}_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT_EXP * ${X}_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP + * ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP + * ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP + * ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP + * ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP + * ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP + * ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP * ${x}_command_t * ${X}_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP @@ -184,6 +190,8 @@ Changelog +------------------------------------------------------------------------+ | 9.0 | Remove layered image properties struct. | +------------------------------------------------------------------------+ +| 10.0 | Added device queries for sampled image fetch capabilities. | ++----------+-------------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- diff --git a/scripts/core/exp-bindless-images.yml b/scripts/core/exp-bindless-images.yml index d2e508c4a7..078061a0ad 100644 --- a/scripts/core/exp-bindless-images.yml +++ b/scripts/core/exp-bindless-images.yml @@ -86,6 +86,24 @@ etors: - name: INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP value: "0x200F" desc: "[$x_bool_t] returns true if the device supports exporting internal event resources" + - name: BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP + value: "0x2010" + desc: "[$x_bool_t] returns true if the device is capable of fetching USM backed 1D sampled image data." + - name: BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP + value: "0x2011" + desc: "[$x_bool_t] returns true if the device is capable of fetching non-USM backed 1D sampled image data." + - name: BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP + value: "0x2012" + desc: "[$x_bool_t] returns true if the device is capable of fetching USM backed 2D sampled image data." + - name: BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP + value: "0x2013" + desc: "[$x_bool_t] returns true if the device is capable of fetching non-USM backed 2D sampled image data." + - name: BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP + value: "0x2014" + desc: "[$x_bool_t] returns true if the device is capable of fetching USM backed 3D sampled image data." + - name: BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP + value: "0x2015" + desc: "[$x_bool_t] returns true if the device is capable of fetching non-USM backed 3D sampled image data." --- #-------------------------------------------------------------------------- type: enum extend: true diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index b33ad6c792..39397e6d77 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -902,6 +902,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, // CUDA does not support exporting semaphores or events. return ReturnValue(false); } + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP: { + // CUDA does support fetching 1D USM sampled image data. + return ReturnValue(true); + } + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP: { + // CUDA does not support fetching 1D non-USM sampled image data. + return ReturnValue(false); + } + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP: { + // CUDA does support fetching 2D USM sampled image data. + return ReturnValue(true); + } + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP: { + // CUDA does support fetching 2D non-USM sampled image data. + return ReturnValue(true); + } + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP: { + // CUDA does not support 3D USM sampled textures + return ReturnValue(false); + } + case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP: { + // CUDA does support fetching 3D non-USM sampled image data. + return ReturnValue(true); + } case UR_DEVICE_INFO_DEVICE_ID: { int Value = 0; UR_CHECK_ERROR(cuDeviceGetAttribute( diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 1ad5cede4d..0001652efa 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -496,7 +496,7 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetInfo( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP < propName) { + if (UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP < propName) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 7cbbdffb1c..85ad6c3d60 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -842,7 +842,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 665e75548b..9f659baf63 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -736,7 +736,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index 15894cafb8..55c5e0a1d5 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -378,5 +378,23 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, std::cout << prefix; printDeviceInfo( hDevice, UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo( + hDevice, UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP); + std::cout << prefix; + printDeviceInfo( + hDevice, UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP); + std::cout << prefix; + printDeviceInfo( + hDevice, UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP); + std::cout << prefix; + printDeviceInfo( + hDevice, UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP); + std::cout << prefix; + printDeviceInfo( + hDevice, UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP); + std::cout << prefix; + printDeviceInfo( + hDevice, UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP); } } // namespace urinfo