From 3ea1db3876413a680e6f8f62d2710407bb2480d4 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Tue, 21 Nov 2023 12:13:32 +0000 Subject: [PATCH 01/34] [Bindless][Exp] Add texture fetch functionality With this commit, it is now possible to "fetch" sampled image data by passing integer coordinates to the `read_image` funtion. --- libclc/ptx-nvidiacl/libspirv/images/image.cl | 245 ++++++++++++++++++ .../libspirv/images/image_helpers.ll | 74 +++++- .../llvm/SYCLLowerIR/DeviceConfigFile.td | 12 +- .../sycl_ext_oneapi_bindless_images.asciidoc | 47 +++- sycl/include/CL/__spirv/spirv_ops.hpp | 3 + sycl/include/sycl/detail/image_ocl_types.hpp | 12 + sycl/include/sycl/detail/pi.h | 16 +- sycl/include/sycl/device_aspect_macros.hpp | 83 +++++- .../sycl/ext/oneapi/bindless_images.hpp | 54 +++- sycl/include/sycl/info/aspects.def | 16 +- sycl/plugins/unified_runtime/CMakeLists.txt | 22 +- sycl/plugins/unified_runtime/pi2ur.hpp | 18 ++ sycl/source/detail/device_impl.cpp | 51 ++++ .../bindless_images/image_get_info.cpp | 27 +- .../sampled_fetch/fetch_1D_USM.cpp | 104 ++++++++ .../sampled_fetch/fetch_2D.cpp | 125 +++++++++ .../sampled_fetch/fetch_2D_USM.cpp | 132 ++++++++++ .../sampled_fetch/fetch_3D.cpp | 129 +++++++++ sycl/test-e2e/bindless_images/sampling_1D.cpp | 2 +- 19 files changed, 1130 insertions(+), 42 deletions(-) create mode 100644 sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp create mode 100644 sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp create mode 100644 sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp create mode 100644 sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp diff --git a/libclc/ptx-nvidiacl/libspirv/images/image.cl b/libclc/ptx-nvidiacl/libspirv/images/image.cl index f55f0c435cf35..67553b03c2f84 100644 --- a/libclc/ptx-nvidiacl/libspirv/images/image.cl +++ b/libclc/ptx-nvidiacl/libspirv/images/image.cl @@ -1837,6 +1837,8 @@ _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN(half4, 3, Dv4_i, Dv4_DF16_, v4f16, int4 // <--- TEXTURES ---> +// <--- Texture sampling (float coords) ---> + // Int int4 __nvvm_tex_1d_v4i32_f32(unsigned long, float) __asm("__clc_llvm_nvvm_tex_1d_v4i32_f32"); @@ -2270,6 +2272,249 @@ _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN(half4, 1, Dv4_DF16_, v4f16, f, fl _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN(half4, 2, Dv4_DF16_, v4f16, Dv2_f, float2 coord, coord.x, coord.y) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN(half4, 3, Dv4_DF16_, v4f16, Dv4_f, float4 coord, coord.x, coord.y, coord.z) +// <--- Texture fetching (integer coords) ---> + +// Int +int4 __nvvm_tex_1d_v4i32_i32(unsigned long, + int) __asm("__clc_llvm_nvvm_tex_1d_v4i32_s32"); +int4 __nvvm_tex_2d_v4i32_i32(unsigned long, int, + int) __asm("__clc_llvm_nvvm_tex_2d_v4i32_s32"); +int4 __nvvm_tex_3d_v4i32_i32(unsigned long, int, int, + int) __asm("__clc_llvm_nvvm_tex_3d_v4i32_s32"); + +// Unsigned int +uint4 __nvvm_tex_1d_v4j32_i32(unsigned long, + int) __asm("__clc_llvm_nvvm_tex_1d_v4j32_s32"); +uint4 __nvvm_tex_2d_v4j32_i32(unsigned long, int, + int) __asm("__clc_llvm_nvvm_tex_2d_v4j32_s32"); +uint4 __nvvm_tex_3d_v4j32_i32(unsigned long, int, int, + int) __asm("__clc_llvm_nvvm_tex_3d_v4j32_s32"); + +// Float +float4 __nvvm_tex_1d_v4f32_i32(unsigned long, + int) __asm("__clc_llvm_nvvm_tex_1d_v4f32_s32"); +float4 __nvvm_tex_2d_v4f32_i32(unsigned long, int, + int) __asm("__clc_llvm_nvvm_tex_2d_v4f32_s32"); +float4 __nvvm_tex_3d_v4f32_i32(unsigned long, int, int, + int) __asm("__clc_llvm_nvvm_tex_3d_v4f32_s32"); + +// Macro to generate texture vec4 fetches +#define _CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN( \ + elem_t, fetch_elem_t, dimension, vec_size, fetch_vec_size, coord_input, \ + coord_parameter) \ + elem_t##4 __nvvm_tex_##dimension##d_##vec_size##_i32( \ + unsigned long imageHandle, coord_input) { \ + fetch_elem_t##4 a = __nvvm_tex_##dimension##d_##fetch_vec_size##_i32( \ + imageHandle, coord_parameter); \ + return cast_##fetch_elem_t##4_to_##elem_t##4(a); \ + } + +// Macro to generate texture vec2 fetches +#define _CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN( \ + elem_t, fetch_elem_t, dimension, vec_size, fetch_vec_size, coord_input, \ + coord_parameter) \ + elem_t##2 __nvvm_tex_##dimension##d_##vec_size##_i32( \ + unsigned long imageHandle, coord_input) { \ + fetch_elem_t##4 a = __nvvm_tex_##dimension##d_##fetch_vec_size##_i32( \ + imageHandle, coord_parameter); \ + return cast_##fetch_elem_t##4_to_##elem_t##2(a); \ + } + +// Macro to generate texture singular data type fetches +#define _CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN( \ + elem_t, fetch_elem_t, dimension, vec_size, fetch_vec_size, coord_input, \ + coord_parameter) \ + elem_t __nvvm_tex_##dimension##d_##vec_size##_i32(unsigned long imageHandle, \ + coord_input) { \ + return (elem_t)__nvvm_tex_##dimension##d_##fetch_vec_size##_i32( \ + imageHandle, coord_parameter)[0]; \ + } + +#define COORD_INPUT_1D int x +#define COORD_INPUT_2D int x, int y +#define COORD_INPUT_3D int x, int y, int z + +#define COORD_PARAMS_1D x +#define COORD_PARAMS_2D x, y +#define COORD_PARAMS_3D x, y, z + +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(short, int, 1, v4i16, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(short, int, 2, v4i16, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(short, int, 3, v4i16, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 1, v4t16, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 2, v4t16, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 3, v4t16, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(char, int, 1, v4i8, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(char, int, 2, v4i8, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(char, int, 3, v4i8, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 1, v4h8, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 2, v4h8, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 3, v4h8, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(half, float, 1, v4f16, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(half, float, 2, v4f16, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(half, float, 3, v4f16, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D) + +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(float, float, 1, v2f32, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(float, float, 2, v2f32, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(float, float, 3, v2f32, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(int, int, 1, v2i32, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(int, int, 2, v2i32, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(int, int, 3, v2i32, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 1, v2j32, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 2, v2j32, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 3, v2j32, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(short, int, 1, v2i16, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(short, int, 2, v2i16, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(short, int, 3, v2i16, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 1, v2t16, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 2, v2t16, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 3, v2t16, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(char, int, 1, v2i8, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(char, int, 2, v2i8, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(char, int, 3, v2i8, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 1, v2h8, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 2, v2h8, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 3, v2h8, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(half, float, 1, v2f16, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(half, float, 2, v2f16, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(half, float, 3, v2f16, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D) + +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(float, float, 1, f32, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(float, float, 2, f32, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(float, float, 3, f32, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(int, int, 1, i32, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(int, int, 2, i32, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(int, int, 3, i32, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 1, j32, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 2, j32, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 3, j32, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(short, int, 1, i16, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(short, int, 2, i16, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(short, int, 3, i16, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 1, t16, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 2, t16, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 3, t16, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(char, int, 1, i8, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(char, int, 2, i8, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(char, int, 3, i8, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 1, h8, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 2, h8, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 3, h8, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(half, float, 1, f16, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(half, float, 2, f16, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(half, float, 3, f16, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D) + + +#undef COORD_INPUT_1D +#undef COORD_INPUT_2D +#undef COORD_INPUT_3D + +#undef COORD_PARAMS_1D +#undef COORD_PARAMS_2D +#undef COORD_PARAMS_3D + +#undef _CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN +#undef _CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN +#undef _CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN + +#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN( \ + elem_t, dimension, elem_t_mangled, vec_size, coord_mangled, coord_input, \ + ...) \ + _CLC_DEF elem_t MANGLE_FUNC_IMG_HANDLE( \ + 18, __spirv_ImageFetch, I##elem_t_mangled, \ + coord_mangled##ET_T0_T1_)(ulong imageHandle, coord_input) { \ + return __nvvm_tex_##dimension##d_##vec_size##_i32(imageHandle, \ + __VA_ARGS__); \ + } + +// Int +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int, 1, i, i32, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int, 2, i, i32, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int, 3, i, i32, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 1, Dv2_i, v2i32, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 2, Dv2_i, v2i32, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 3, Dv2_i, v2i32, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int4, 1, Dv4_i, v4i32, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int4, 2, Dv4_i, v4i32, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int4, 3, Dv4_i, v4i32, Dv4_i, int4 coord, coord.x, coord.y, coord.z) + +// Unsigned int +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint, 1, j, j32, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint, 2, j, j32, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint, 3, j, j32, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint2, 1, Dv2_j, v2j32, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint2, 2, Dv2_j, v2j32, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint2, 3, Dv2_j, v2j32, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint4, 1, Dv4_j, v4j32, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint4, 2, Dv4_j, v4j32, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint4, 3, Dv4_j, v4j32, Dv4_i, int4 coord, coord.x, coord.y, coord.z) + +// Short +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short, 1, s, i16, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short, 2, s, i16, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short, 3, s, i16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short2, 1, Dv2_s, v2i16, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short2, 2, Dv2_s, v2i16, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short2, 3, Dv2_s, v2i16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short4, 1, Dv4_s, v4i16, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short4, 2, Dv4_s, v4i16, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short4, 3, Dv4_s, v4i16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) + +// Unsigned short +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort, 1, t, t16, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort, 2, t, t16, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort, 3, t, t16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 1, Dv2_t, v2t16, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 2, Dv2_t, v2t16, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 3, Dv2_t, v2t16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 1, Dv4_t, v4t16, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 2, Dv4_t, v4t16, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 3, Dv4_t, v4t16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) + +// Char +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char, 1, a, i8, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char, 2, a, i8, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char, 3, a, i8, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char2, 1, Dv2_a, v2i8, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char2, 2, Dv2_a, v2i8, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char2, 3, Dv2_a, v2i8, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char4, 1, Dv4_a, v4i8, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char4, 2, Dv4_a, v4i8, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char4, 3, Dv4_a, v4i8, Dv4_i, int4 coord, coord.x, coord.y, coord.z) + +// Unsigned Char +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar, 1, h, h8, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar, 2, h, h8, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar, 3, h, h8, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 1, Dv2_h, v2h8, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 2, Dv2_h, v2h8, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 3, Dv2_h, v2h8, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 1, Dv4_h, v4h8, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 2, Dv4_h, v4h8, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 3, Dv4_h, v4h8, Dv4_i, int4 coord, coord.x, coord.y, coord.z) + +// Float +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float, 1, f, f32, i, uint x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float, 2, f, f32, Dv2_i, uint2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float, 3, f, f32, Dv4_i, uint4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 1, Dv2_f, v2f32, i, uint x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 2, Dv2_f, v2f32, S0_, uint2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 3, Dv2_f, v2f32, Dv4_i, uint4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float4, 1, Dv4_f, v4f32, i, uint x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float4, 2, Dv4_f, v4f32, Dv2_i, uint2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float4, 3, Dv4_f, v4f32, S0_, uint4 coord, coord.x, coord.y, coord.z) + +// Half +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half, 1, DF16_, f16, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half, 2, DF16_, f16, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half, 3, DF16_, f16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half2, 1, Dv2_DF16_, v2f16, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half2, 2, Dv2_DF16_, v2f16, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half2, 3, Dv2_DF16_, v2f16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half4, 1, Dv4_DF16_, v4f16, i, int x, x) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half4, 2, Dv4_DF16_, v4f16, Dv2_i, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half4, 3, Dv4_DF16_, v4f16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) + // <--- MIPMAP ---> diff --git a/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll b/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll index fdc7833275234..c81de52c86ba1 100644 --- a/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll +++ b/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll @@ -264,7 +264,7 @@ entry: ret <4 x i16> %1 } -; <--- TEXTURES ---> +; <--- TEXTURE SAMPLING (floating-point coordinates) ---> declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.1d.v4s32.f32(i64, float) define <4 x i32> @__clc_llvm_nvvm_tex_1d_v4i32_f32(i64 %img, float %x) nounwind alwaysinline { entry: @@ -337,6 +337,78 @@ entry: ret <4 x float> %1 } +; <--- TEXTURE FETCHING (integer coordinates) ---> +declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.1d.v4s32.s32(i64, i32) +define <4 x i32> @__clc_llvm_nvvm_tex_1d_v4i32_s32(i64 %img, i32 %x) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.1d.v4s32.s32(i64 %img, i32 %x); + %1 = tail call <4 x i32>@__clc_struct32_to_vector({i32,i32,i32,i32} %0) + ret <4 x i32> %1 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.2d.v4s32.s32(i64, i32, i32) +define <4 x i32> @__clc_llvm_nvvm_tex_2d_v4i32_s32(i64 %img, i32 %x, i32 %y) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.2d.v4s32.s32(i64 %img, i32 %x, i32 %y); + %1 = tail call <4 x i32>@__clc_struct32_to_vector({i32,i32,i32,i32} %0) + ret <4 x i32> %1 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.3d.v4s32.s32(i64, i32, i32, i32) +define <4 x i32> @__clc_llvm_nvvm_tex_3d_v4i32_s32(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.3d.v4s32.s32(i64 %img, i32 %x, i32 %y, i32 %z); + %1 = tail call <4 x i32>@__clc_struct32_to_vector({i32,i32,i32,i32} %0) + ret <4 x i32> %1 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.1d.v4u32.s32(i64, i32) +define <4 x i32> @__clc_llvm_nvvm_tex_1d_v4j32_s32(i64 %img, i32 %x) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.1d.v4u32.s32(i64 %img, i32 %x); + %1 = tail call <4 x i32>@__clc_struct32_to_vector({i32,i32,i32,i32} %0) + ret <4 x i32> %1 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.2d.v4u32.s32(i64, i32, i32) +define <4 x i32> @__clc_llvm_nvvm_tex_2d_v4j32_s32(i64 %img, i32 %x, i32 %y) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.2d.v4u32.s32(i64 %img, i32 %x, i32 %y); + %1 = tail call <4 x i32>@__clc_struct32_to_vector({i32,i32,i32,i32} %0) + ret <4 x i32> %1 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.3d.v4u32.s32(i64, i32, i32, i32) +define <4 x i32> @__clc_llvm_nvvm_tex_3d_v4j32_s32(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.3d.v4u32.s32(i64 %img, i32 %x, i32 %y, i32 %z); + %1 = tail call <4 x i32>@__clc_struct32_to_vector({i32,i32,i32,i32} %0) + ret <4 x i32> %1 +} + +declare {float,float,float,float} @llvm.nvvm.tex.unified.1d.v4f32.s32(i64, i32) +define <4 x float> @__clc_llvm_nvvm_tex_1d_v4f32_s32(i64 %img, i32 %x) nounwind alwaysinline { +entry: + %0 = tail call {float,float,float,float} @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %img, i32 %x) + %1 = tail call <4 x float>@__clc_structf32_to_vector({float,float,float,float} %0) + ret <4 x float> %1 +} + +declare {float,float,float,float} @llvm.nvvm.tex.unified.2d.v4f32.s32(i64, i32, i32) +define <4 x float> @__clc_llvm_nvvm_tex_2d_v4f32_s32(i64 %img, i32 %x, i32 %y) nounwind alwaysinline { +entry: + %0 = tail call {float,float,float,float} @llvm.nvvm.tex.unified.2d.v4f32.s32(i64 %img, i32 %x, i32 %y); + %1 = tail call <4 x float>@__clc_structf32_to_vector({float,float,float,float} %0) + ret <4 x float> %1 +} + +declare {float,float,float,float} @llvm.nvvm.tex.unified.3d.v4f32.s32(i64, i32, i32, i32) +define <4 x float> @__clc_llvm_nvvm_tex_3d_v4f32_s32(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline { +entry: + %0 = tail call {float,float,float,float} @llvm.nvvm.tex.unified.3d.v4f32.s32(i64 %img, i32 %x, i32 %y, i32 %z); + %1 = tail call <4 x float>@__clc_structf32_to_vector({float,float,float,float} %0) + ret <4 x float> %1 +} ; <--- MIPMAP ---> diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index ff06de04089c1..659385b37e39a 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -61,6 +61,12 @@ def AspectExt_oneapi_interop_semaphore_export : Aspect<"ext_oneapi_interop_semap def AspectExt_oneapi_mipmap : Aspect<"ext_oneapi_mipmap">; def AspectExt_oneapi_mipmap_anisotropy : Aspect<"ext_oneapi_mipmap_anisotropy">; def AspectExt_oneapi_mipmap_level_reference : Aspect<"ext_oneapi_mipmap_level_reference">; +def AspectExt_oneapi_bindless_sampled_image_fetch_1d_usm : Aspect<"ext_oneapi_bindless_sampled_image_fetch_1d_usm">; +def AspectExt_oneapi_bindless_sampled_image_fetch_1d : Aspect<"ext_oneapi_bindless_sampled_image_fetch_1d">; +def AspectExt_oneapi_bindless_sampled_image_fetch_2d_usm : Aspect<"ext_oneapi_bindless_sampled_image_fetch_2d_usm">; +def AspectExt_oneapi_bindless_sampled_image_fetch_2d : Aspect<"ext_oneapi_bindless_sampled_image_fetch_2d">; +def AspectExt_oneapi_bindless_sampled_image_fetch_3d_usm : Aspect<"ext_oneapi_bindless_sampled_image_fetch_3d_usm">; +def AspectExt_oneapi_bindless_sampled_image_fetch_3d : Aspect<"ext_oneapi_bindless_sampled_image_fetch_3d">; def AspectExt_intel_esimd : Aspect<"ext_intel_esimd">; def AspectExt_oneapi_ballot_group : Aspect<"ext_oneapi_ballot_group">; def AspectExt_oneapi_fixed_size_group : Aspect<"ext_oneapi_fixed_size_group">; @@ -114,7 +120,11 @@ def : TargetInfo<"__TestAspectList", AspectExt_oneapi_bindless_images_shared_usm, AspectExt_oneapi_bindless_images_1d_usm, AspectExt_oneapi_bindless_images_2d_usm, AspectExt_oneapi_interop_memory_import, AspectExt_oneapi_interop_memory_export, AspectExt_oneapi_interop_semaphore_import, AspectExt_oneapi_interop_semaphore_export, - AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy, AspectExt_oneapi_mipmap_level_reference, AspectExt_intel_esimd, + AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy, AspectExt_oneapi_mipmap_level_reference, + AspectExt_oneapi_bindless_sampled_image_fetch_1d_usm, AspectExt_oneapi_bindless_sampled_image_fetch_1d, + AspectExt_oneapi_bindless_sampled_image_fetch_2d_usm, AspectExt_oneapi_bindless_sampled_image_fetch_2d, + AspectExt_oneapi_bindless_sampled_image_fetch_3d_usm, AspectExt_oneapi_bindless_sampled_image_fetch_3d, + AspectExt_intel_esimd, AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group, AspectExt_oneapi_tangle_group], []>; // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc index d37670db4641c..5c05182228c6e 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @@ -983,8 +983,16 @@ void write_image(unsampled_image_handle &ImageHandle, Inside a kernel, it's possible to read an image via `read_image`, passing the image handle. For the form that takes `unsampled_image_handle`, image data will be fetched exactly as is in device memory. For the form that takes a -`sampled_image_handle`, the image will be sampled according to the +`sampled_image_handle`, the image data will either be sampled or fetched, +depending on the type of `CoordT` passed. If the coordinates are +floating-point, the image will be sampled according to the `bindless_image_sampler` that was passed to the image upon construction. +If the coordinates are integers, the image data will be fetched, and no +sampling operations will be performed, the `bindless_image_sampler` passed to +the image upon creation has no effect when fetching sampled image data with +integer coordinates. Note that not all devices may support fetching of sampled +image data depending on the dimension or backing memory type. We provide +device aspect queries for this in <>. The user is required to pass a `DataT` template parameter, which specifies the return type of the `read_image` function. If `DataT` is not a recognized @@ -1015,9 +1023,12 @@ For reading and writing of unsampled images, coordinates are specified by `int`, `sycl::vec`, and `sycl::vec` for 1D, 2D, and 3D images, respectively. -Sampled image reads take `float`, `sycl::vec`, and +Sampled image "sampled reads" take `float`, `sycl::vec`, and `sycl::vec` coordinate types for 1D, 2D, and 3D images, respectively. +Sampled image "fetch reads" take `int`, `sycl::vec`, and +`sycl::vec` coordinate types for 1D, 2D, and 3D images, respectively. + Note that in the case of 3D reads or writes, coordinates for 3D images take a vector of size 4, not 3, as the fourth element in the coordinate vector is ignored. @@ -1065,6 +1076,36 @@ When providing the above types as `DataT` parameters to an image read function, the corresponding `HintT` parameters to use would be `sycl::vec` and `sycl::vec`, respectively. +=== Querying sampled image fetch support [[querying_sampled_image_fetch_support]] + +We provide the following device queries to query support for sampled image +fetch with various backing memory types and dimensionalities. + +The device aspect descriptors for these queries are: + +[frame="none",options="header"] +|====================== +|Device descriptor | Description +|`aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm` | + Indicates if the device is capable of fetching USM backed 1D + sampled image data. +|`aspect::ext_oneapi_bindless_sampled_image_fetch_1d` | + Indicates if the device is capable of fetching non-USM backed 1D + sampled image data. +|`aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm` | + Indicates if the device is capable of fetching USM backed 2D + sampled image data. +|`aspect::ext_oneapi_bindless_sampled_image_fetch_2d` | + Indicates if the device is capable of fetching non-USM backed 2D + sampled image data. +|`aspect::ext_oneapi_bindless_sampled_image_fetch_3d_usm` | + Indicates if the device is capable of fetching USM backed 3D + sampled image data. +|`aspect::ext_oneapi_bindless_sampled_image_fetch_3d` | + Indicates if the device is capable of fetching non-USM backed 3D + sampled image data. +|====================== + == Mipmapped images So far, we have described how to create and operate on standard bindless images. @@ -2060,4 +2101,6 @@ These features still need to be handled: wording around what types are allowed to be read or written. - Allow `read_image` and `read_mipmap` to return a user-defined type. +|5.1|2024-01-16| - Allow fetching of sampled image data by passing integer + coordinates to `read_image`. |====================== diff --git a/sycl/include/CL/__spirv/spirv_ops.hpp b/sycl/include/CL/__spirv/spirv_ops.hpp index ea1a6580d30e6..36a74bc79fdd3 100644 --- a/sycl/include/CL/__spirv/spirv_ops.hpp +++ b/sycl/include/CL/__spirv/spirv_ops.hpp @@ -199,6 +199,9 @@ extern __DPCPP_SYCL_EXTERNAL void __spirv_ImageWrite(ImageT, CoordT, ValT); template extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageRead(ImageT, TempArgT); +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageFetch(ImageT, TempArgT); + template extern __DPCPP_SYCL_EXTERNAL SampledType __spirv_SampledImage(ImageT, __ocl_sampler_t); diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp index 60db7ccb645c8..adcedc6722297 100644 --- a/sycl/include/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/sycl/detail/image_ocl_types.hpp @@ -83,6 +83,18 @@ static RetType __invoke__ImageRead(ImageT Img, CoordT Coords) { return sycl::detail::convertDataToType(Ret); } +template +static RetType __invoke__ImageFetch(ImageT Img, CoordT Coords) { + + // Convert from sycl types to builtin types to get correct function mangling. + using TempRetT = sycl::detail::ConvertToOpenCLType_t; + using TempArgT = sycl::detail::ConvertToOpenCLType_t; + + TempArgT Arg = sycl::detail::convertDataToType(Coords); + TempRetT Ret = __spirv_ImageFetch(Img, Arg); + return sycl::detail::convertDataToType(Ret); +} + template static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords, float Level) { diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 4d0da9995908a..339059cbd441f 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -149,9 +149,16 @@ // 14.40 Add HIP _pi_mem_advice alises to match the PI_MEM_ADVICE_CUDA* ones. // 14.41 Added piextCommandBufferMemBufferFill & piextCommandBufferFillUSM // 14.42 Added piextCommandBufferPrefetchUSM and piextCommandBufferAdviseUSM +// 14.43 Added device queries for sampled image fetch support +// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM +// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D +// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM +// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D +// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM +// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D #define _PI_H_VERSION_MAJOR 14 -#define _PI_H_VERSION_MINOR 42 +#define _PI_H_VERSION_MINOR 43 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -434,6 +441,13 @@ typedef enum { PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT = 0x2010F, PI_EXT_ONEAPI_DEVICE_INFO_MATRIX_COMBINATIONS = 0x20110, + + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM = 0x20111, + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D = 0x20112, + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM = 0x20113, + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D = 0x20114, + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM = 0x20115, + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D = 0x20116, } _pi_device_info; typedef enum { diff --git a/sycl/include/sycl/device_aspect_macros.hpp b/sycl/include/sycl/device_aspect_macros.hpp index 3f515ddc564af..1a801deacb3c5 100644 --- a/sycl/include/sycl/device_aspect_macros.hpp +++ b/sycl/include/sycl/device_aspect_macros.hpp @@ -273,28 +273,61 @@ #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_mipmap_level_reference__ 0 #endif +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 53) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ \ + 0 +#endif + +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 54) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d__ 0 +#endif + +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 55) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ \ + 0 +#endif + +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 56) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d__ 0 +#endif + +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 57) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ \ + 0 +#endif + +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 58) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d__ 0 +#endif + #ifndef __SYCL_ALL_DEVICES_HAVE_ext_intel_esimd__ -//__SYCL_ASPECT(ext_intel_esimd, 53) +//__SYCL_ASPECT(ext_intel_esimd, 59) #define __SYCL_ALL_DEVICES_HAVE_ext_intel_esimd__ 0 #endif #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_ballot_group__ -// __SYCL_ASPECT(ext_oneapi_ballot_group, 54) +// __SYCL_ASPECT(ext_oneapi_ballot_group, 60) #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_ballot_group__ 0 #endif #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_fixed_size_group__ -// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 55) +// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 61) #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_fixed_size_group__ 0 #endif #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_opportunistic_group__ -// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 56) +// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 62) #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_opportunistic_group__ 0 #endif #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_tangle_group__ -// __SYCL_ASPECT(ext_oneapi_tangle_group, 57) +// __SYCL_ASPECT(ext_oneapi_tangle_group, 63) #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_tangle_group__ 0 #endif @@ -563,27 +596,57 @@ #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_mipmap_level_reference__ 0 #endif +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 53) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ 0 +#endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 54) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d__ 0 +#endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 55) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ 0 +#endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 56) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d__ 0 +#endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 57) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ 0 +#endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 58) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d__ 0 +#endif + #ifndef __SYCL_ANY_DEVICE_HAS_ext_intel_esimd__ -//__SYCL_ASPECT(ext_intel_esimd, 53) +//__SYCL_ASPECT(ext_intel_esimd, 59) #define __SYCL_ANY_DEVICE_HAS_ext_intel_esimd__ 0 #endif #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_ballot_group__ -// __SYCL_ASPECT(ext_oneapi_ballot_group, 54) +// __SYCL_ASPECT(ext_oneapi_ballot_group, 60) #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_ballot_group__ 0 #endif #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_fixed_size_group__ -// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 55) +// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 61) #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_fixed_size_group__ 0 #endif #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_opportunistic_group__ -// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 56) +// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 62) #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_opportunistic_group__ 0 #endif #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_tangle_group__ -// __SYCL_ASPECT(ext_oneapi_tangle_group, 57) +// __SYCL_ASPECT(ext_oneapi_tangle_group, 63) #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_tangle_group__ 0 #endif diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index 847f53ea2547f..1351414ec0613 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -733,6 +733,28 @@ template constexpr void assert_unsampled_coords() { } } +template constexpr bool are_floating_coords() { + if constexpr (is_vec_v) { + return std::is_same_v; + } else { + return std::is_same_v; + } +} + +template constexpr bool are_integer_coords() { + if constexpr (is_vec_v) { + return std::is_same_v; + } else { + return std::is_same_v; + } +} + +template constexpr void assert_coords_type() { + static_assert(are_floating_coords() || are_integer_coords(), + "Expected coordinates to be of `float` or `int` type, or " + "vectors of these types."); +} + // assert coords or elements of coords is of a float type template constexpr void assert_sampled_coords() { if constexpr (std::is_scalar_v) { @@ -813,7 +835,8 @@ DataT read_image(const unsampled_image_handle &imageHandle [[maybe_unused]], * HintT should be a `sycl::vec` type, `sycl::half` type, or POD type. * HintT must also have the same size as DataT. * @tparam CoordT The input coordinate type. e.g. float, float2, or float4 for - * 1D, 2D, and 3D, respectively + * 1D, 2D, and 3D sampling, respectively. And int, int2, or int4 for + * 1D, 2D, and 3D fetching, respectively. * @param imageHandle The image handle * @param coords The coordinates at which to fetch image data * @return Sampled image data @@ -827,23 +850,32 @@ DataT read_image(const unsampled_image_handle &imageHandle [[maybe_unused]], template DataT read_image(const sampled_image_handle &imageHandle [[maybe_unused]], const CoordT &coords [[maybe_unused]]) { - detail::assert_sampled_coords(); + detail::assert_coords_type(); constexpr size_t coordSize = detail::coord_size(); static_assert(coordSize == 1 || coordSize == 2 || coordSize == 4, "Expected input coordinate to be have 1, 2, or 4 components " "for 1D, 2D and 3D images, respectively."); + static_assert(sizeof(HintT) == sizeof(DataT), + "When trying to read a user-defined type, HintT must be of " + "the same size as the user-defined DataT."); + static_assert(detail::is_recognized_standard_type(), + "HintT must always be a recognized standard type"); #ifdef __SYCL_DEVICE_ONLY__ - if constexpr (detail::is_recognized_standard_type()) { - return __invoke__ImageRead(imageHandle.raw_handle, coords); + if constexpr (detail::are_floating_coords()) { + if constexpr (detail::is_recognized_standard_type()) { + return __invoke__ImageRead(imageHandle.raw_handle, coords); + } else { + return sycl::bit_cast( + __invoke__ImageRead(imageHandle.raw_handle, coords)); + } } else { - static_assert(sizeof(HintT) == sizeof(DataT), - "When trying to read a user-defined type, HintT must be of " - "the same size as the user-defined DataT."); - static_assert(detail::is_recognized_standard_type(), - "HintT must always be a recognized standard type"); - return sycl::bit_cast( - __invoke__ImageRead(imageHandle.raw_handle, coords)); + if constexpr (detail::is_recognized_standard_type()) { + return __invoke__ImageFetch(imageHandle.raw_handle, coords); + } else { + return sycl::bit_cast( + __invoke__ImageFetch(imageHandle.raw_handle, coords)); + } } #else assert(false); // Bindless images not yet implemented on host. diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 31ccd0d038c4a..b2c8607101524 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -47,8 +47,14 @@ __SYCL_ASPECT(ext_oneapi_interop_semaphore_export, 49) __SYCL_ASPECT(ext_oneapi_mipmap, 50) __SYCL_ASPECT(ext_oneapi_mipmap_anisotropy, 51) __SYCL_ASPECT(ext_oneapi_mipmap_level_reference, 52) -__SYCL_ASPECT(ext_intel_esimd, 53) -__SYCL_ASPECT(ext_oneapi_ballot_group, 54) -__SYCL_ASPECT(ext_oneapi_fixed_size_group, 55) -__SYCL_ASPECT(ext_oneapi_opportunistic_group, 56) -__SYCL_ASPECT(ext_oneapi_tangle_group, 57) +__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 53) +__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 54) +__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 55) +__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 56) +__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 57) +__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 58) +__SYCL_ASPECT(ext_intel_esimd, 59) +__SYCL_ASPECT(ext_oneapi_ballot_group, 60) +__SYCL_ASPECT(ext_oneapi_fixed_size_group, 61) +__SYCL_ASPECT(ext_oneapi_opportunistic_group, 62) +__SYCL_ASPECT(ext_oneapi_tangle_group, 63) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 8d7f2f32b4158..7a6e9ac2c67eb 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -56,14 +56,20 @@ endif() if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 79c28d0f0713f58358d5080653d95803fd131749 - # Merge: 25e0b603 45d76b78 - # Author: aarongreig - # Date: Fri Jan 12 16:14:44 2024 +0000 - # Merge pull request #1186 from hdelan/device-global-hip - # [HIP] Add support for global variable read write - set(UNIFIED_RUNTIME_TAG 79c28d0f0713f58358d5080653d95803fd131749) + set(UNIFIED_RUNTIME_REPO "git@github.com:przemektmalon/unified-runtime.git") + # commit 16c6dc262451cf1b7dcb7d8021904a968c4a16ee + # Author: Przemek Malon + # Date: Wed Nov 29 11:25:34 2023 +0000 + # [Bindless][Exp] Add device queries for sampled image fetch + # Added the following queries for device capabilities of fetching sampled + # images: + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D + set(UNIFIED_RUNTIME_TAG aa442a391c79692af279aaeda74da63c9ba6a489) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 7cdfc8ad7d30a..35a641a527d42 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -1256,6 +1256,24 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT, UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM, + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D, + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM, + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D, + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM, + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D, + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D) PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT, UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 5d5b98e4cf02b..b0e9b9fe05778 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -546,6 +546,57 @@ bool device_impl::has(aspect Aspect) const { sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; return call_successful && support; } + case aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm: { + pi_bool support = PI_FALSE; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; + } + case aspect::ext_oneapi_bindless_sampled_image_fetch_1d: { + pi_bool support = PI_FALSE; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; + } + case aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm: { + pi_bool support = PI_FALSE; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; + } + case aspect::ext_oneapi_bindless_sampled_image_fetch_2d: { + pi_bool support = PI_FALSE; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; + } + case aspect::ext_oneapi_bindless_sampled_image_fetch_3d_usm: { + pi_bool support = PI_FALSE; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; + } + case aspect::ext_oneapi_bindless_sampled_image_fetch_3d: { + pi_bool support = PI_FALSE; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; + } case aspect::ext_intel_esimd: { pi_bool support = PI_FALSE; bool call_successful = diff --git a/sycl/test-e2e/bindless_images/image_get_info.cpp b/sycl/test-e2e/bindless_images/image_get_info.cpp index 931332b928cb4..353c4fe5c0472 100644 --- a/sycl/test-e2e/bindless_images/image_get_info.cpp +++ b/sycl/test-e2e/bindless_images/image_get_info.cpp @@ -55,8 +55,31 @@ int main() { std::cout << "bindless_images_support: " << bindlessSupport << "\nbindless_images_shared_usm_support: " << bindlessSharedUsmSupport - << "\nbindless_images_1d_usm_support: " 1dS - << "\nbindless_images_2d_usm_support: " << S << "\n"; + << "\nbindless_images_1d_usm_support: " << usm1dSupport + << "\nbindless_images_2d_usm_support: " << usm2dSupport << "\n"; +#endif + + // Extension: query for sampled image fetch capabilities + bool sampledFetch1DUSMSupport = + dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm); + bool sampledFetch2DUSMSupport = + dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm); + bool sampledFetch3DUSMSupport = + dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_3d_usm); + bool sampledFetch1DSupport = + dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_1d); + bool sampledFetch2DSupport = + dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_2d); + bool sampledFetch3DSupport = + dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_3d); + +#ifdef VERBOSE_PRINT + std::cout << "sampledFetch1DUSMSupport: " << sampledFetch1DUSMSupport + << "\nsampledFetch2DUSMSupport: " << sampledFetch2DUSMSupport + << "\nsampledFetch3DUSMSupport: " << sampledFetch3DUSMSupport + << "\nsampledFetch1DSupport: " << sampledFetch1DSupport + << "\nsampledFetch2DSupport: " << sampledFetch2DSupport + << "\nsampledFetch3DSupport: " << sampledFetch3DSupport << "\n"; #endif // Extension: get pitch alignment information from device -- device info diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp new file mode 100644 index 0000000000000..bacafd92fcf21 --- /dev/null +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp @@ -0,0 +1,104 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class kernel_sampled_fetch; + +int main() { + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // Check if device supports 1D USM sampled image fetches + if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm)) { +#ifdef VERBOSE_PRINT + std::cout << "Test skipped due to lack of device support for fetching 1D " + "USM backed sampled images\n"; +#endif + return 0; + } + + // declare image data + constexpr size_t width = 16; + std::vector out(width); + std::vector expected(width); + std::vector dataIn(width); + auto imgMem = sycl::malloc_shared(width, q); + for (int i = 0; i < width; i++) { + expected[i] = i; + imgMem[i] = i; + } + + namespace syclexp = sycl::ext::oneapi::experimental; + + try { + // Extension: image descriptor + syclexp::image_descriptor desc({width}, sycl::image_channel_order::r, + sycl::image_channel_type::fp32); + + syclexp::bindless_image_sampler samp( + sycl::addressing_mode::repeat, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::linear); + + auto imgHandle = syclexp::create_image(imgMem, 0, samp, desc, q); + + sycl::buffer buf(out.data(), sycl::range{width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access(cgh, width); + + cgh.parallel_for(width, [=](sycl::id<1> id) { + // Extension: fetch data from sampled image handle + float px1 = syclexp::read_image(imgHandle, int(id[0])); + + outAcc[id] = px1; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + syclexp::destroy_image_handle(imgHandle, dev, ctxt); + sycl::free(imgMem, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < width; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp new file mode 100644 index 0000000000000..0c5108f5b03f4 --- /dev/null +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp @@ -0,0 +1,125 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class kernel_sampled_fetch; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // Check if device supports 2D non-USM sampled image fetches + if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_2d)) { +#ifdef VERBOSE_PRINT + std::cout << "Test skipped due to lack of device support for fetching 2D " + "non-USM backed sampled images\n"; +#endif + return 0; + } + + // declare image data + constexpr size_t width = 5; + constexpr size_t height = 6; + constexpr size_t N = width * height; + std::vector out(N); + std::vector expected(N); + std::vector dataIn(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + auto index = i + (width * j); + expected[index] = index; + dataIn[index] = index; + } + } + + namespace syclexp = sycl::ext::oneapi::experimental; + + try { + syclexp::bindless_image_sampler samp( + sycl::addressing_mode::repeat, + sycl::coordinate_normalization_mode::unnormalized, + sycl::filtering_mode::nearest); + + // Extension: image descriptor + syclexp::image_descriptor desc({width, height}, + sycl::image_channel_order::r, + sycl::image_channel_type::fp32); + + // Extension: allocate memory on device + syclexp::image_mem imgMem(desc, dev, ctxt); + + // Extension: copy over data to device for non-USM image + q.ext_oneapi_copy(dataIn.data(), imgMem.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the images and return the handles + syclexp::sampled_image_handle imgHandle = + syclexp::create_image(imgMem, samp, desc, q); + + sycl::buffer buf(out.data(), sycl::range{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Extension: fetch data from sampled image handle + float px1 = + syclexp::read_image(imgHandle, sycl::int2(dim0, dim1)); + + outAcc[sycl::id<2>{dim1, dim0}] = px1; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + syclexp::destroy_image_handle(imgHandle, dev, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp new file mode 100644 index 0000000000000..8932560b95e91 --- /dev/null +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp @@ -0,0 +1,132 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class kernel_sampled_fetch; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // Check if device supports 2D USM sampled image fetches + if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm)) { +#ifdef VERBOSE_PRINT + std::cout << "Test skipped due to lack of device support for fetching 2D " + "USM backed sampled images\n"; +#endif + return 0; + } + + // declare image data + constexpr size_t width = 5; + constexpr size_t height = 6; + constexpr size_t N = width * height; + std::vector> out(N); + std::vector> expected(N); + std::vector> dataIn(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + auto index = i + (width * j); + expected[index] = {index, index, index, index}; + dataIn[index] = {index, index, index, index}; + } + } + + namespace syclexp = sycl::ext::oneapi::experimental; + + try { + syclexp::bindless_image_sampler samp( + sycl::addressing_mode::repeat, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::linear); + + // Extension: image descriptor + syclexp::image_descriptor desc({width, height}, + sycl::image_channel_order::rgba, + sycl::image_channel_type::unsigned_int16); + size_t pitch = 0; + + // Extension: returns the device pointer to USM allocated pitched memory + auto imgMem = syclexp::pitched_alloc_device(&pitch, desc, q); + + if (imgMem == nullptr) { + std::cout << "Error allocating pitched image memory!" << std::endl; + return 1; + } + + // Extension: copy over data to device for USM image + q.ext_oneapi_copy(dataIn.data(), imgMem, desc, pitch); + q.wait_and_throw(); + + // Extension: create the images and return the handles + syclexp::sampled_image_handle imgHandle = + syclexp::create_image(imgMem, pitch, samp, desc, q); + + sycl::buffer buf(out.data(), sycl::range{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Extension: fetch data from sampled image handle + auto px1 = syclexp::read_image>( + imgHandle, sycl::int2(dim0, dim1)); + + outAcc[sycl::id<2>{dim1, dim0}] = px1; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + syclexp::destroy_image_handle(imgHandle, dev, ctxt); + sycl::free(imgMem, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i][0] != expected[i][0]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i][0] + << ", Actual: " << out[i][0] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp new file mode 100644 index 0000000000000..823eb8b7cc9b1 --- /dev/null +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp @@ -0,0 +1,129 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class kernel_sampled_fetch; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // Check if device supports 3D non-USM sampled image fetches + if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_3d)) { +#ifdef VERBOSE_PRINT + std::cout << "Test skipped due to lack of device support for fetching 3D " + "non-USM backed sampled images\n"; +#endif + return 0; + } + + // declare image data + constexpr size_t width = 4; + constexpr size_t height = 6; + constexpr size_t depth = 8; + constexpr size_t N = width * height * depth; + std::vector out(N); + std::vector expected(N); + std::vector dataIn(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < depth; k++) { + auto index = i + width * (j + height * k); + expected[index] = index; + dataIn[index] = index; + } + } + } + + namespace syclexp = sycl::ext::oneapi::experimental; + + try { + // Extension: image descriptor + syclexp::image_descriptor desc({width, height, depth}, + sycl::image_channel_order::r, + sycl::image_channel_type::fp32); + + syclexp::bindless_image_sampler samp( + sycl::addressing_mode::none, + sycl::coordinate_normalization_mode::unnormalized, + sycl::filtering_mode::nearest); + + // Extension: allocate memory on device + syclexp::image_mem imgMem(desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn.data(), imgMem.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the image and return the handle + syclexp::sampled_image_handle imgHandle = + syclexp::create_image(imgMem, samp, desc, dev, ctxt); + + sycl::buffer buf(out.data(), sycl::range{depth, height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<3>{depth, height, width}); + + cgh.parallel_for( + sycl::nd_range<3>{{width, height, depth}, {width, height, depth}}, + [=](sycl::nd_item<3> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + size_t dim2 = it.get_local_id(2); + + // Extension: fetch data from sampled image handle + float px1 = syclexp::read_image( + imgHandle, sycl::vec(dim0, dim1, dim2, 0)); + + outAcc[sycl::id<3>{dim2, dim1, dim0}] = px1; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + syclexp::destroy_image_handle(imgHandle, dev, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/sampling_1D.cpp b/sycl/test-e2e/bindless_images/sampling_1D.cpp index 554ba98e13333..b363fd77ec604 100644 --- a/sycl/test-e2e/bindless_images/sampling_1D.cpp +++ b/sycl/test-e2e/bindless_images/sampling_1D.cpp @@ -112,6 +112,6 @@ int main() { return 0; } - std::cout << "Test passed!" << std::endl; + std::cout << "Test failed!" << std::endl; return 3; } From 8c1e5427bd0f8d288922c5c50d0ce5cc43b3dc25 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Fri, 19 Jan 2024 14:23:34 +0000 Subject: [PATCH 02/34] Change UR repo link in CMakeLists to HTTPS --- sycl/plugins/unified_runtime/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 7a6e9ac2c67eb..1b7c3ca9385b7 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -56,7 +56,7 @@ endif() if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) - set(UNIFIED_RUNTIME_REPO "git@github.com:przemektmalon/unified-runtime.git") + set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") # commit 16c6dc262451cf1b7dcb7d8021904a968c4a16ee # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 From 992d8890ff0ccf10e0d67025c2fa2d5c7038214d Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Mon, 22 Jan 2024 09:54:24 +0000 Subject: [PATCH 03/34] Update UR TAG in CMakeLists --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 85e7f3aa0296c..9d93e72862af7 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -57,7 +57,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 16c6dc262451cf1b7dcb7d8021904a968c4a16ee + # commit 9713f7d8469600dcc4b220b4f1d1f01ea909068d # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -69,7 +69,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D - set(UNIFIED_RUNTIME_TAG aa442a391c79692af279aaeda74da63c9ba6a489) + set(UNIFIED_RUNTIME_TAG 9713f7d8469600dcc4b220b4f1d1f01ea909068d) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") From 25752ac4c2862b570e0553934158a3b17a9d7c52 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Tue, 23 Jan 2024 13:07:59 +0000 Subject: [PATCH 04/34] Update UR enums with _EXP suffix --- sycl/plugins/unified_runtime/CMakeLists.txt | 16 ++++++++-------- sycl/plugins/unified_runtime/pi2ur.hpp | 12 ++++++------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 9d93e72862af7..4095e698afa40 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -57,19 +57,19 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 9713f7d8469600dcc4b220b4f1d1f01ea909068d + # commit c9f5374d9e991068431d20456e214f595d7920a5 # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch # Added the following queries for device capabilities of fetching sampled # images: - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D - set(UNIFIED_RUNTIME_TAG 9713f7d8469600dcc4b220b4f1d1f01ea909068d) + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP + set(UNIFIED_RUNTIME_TAG c9f5374d9e991068431d20456e214f595d7920a5) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 35a641a527d42..a603e96c32f5c 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -1258,22 +1258,22 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP) PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM, - UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM) + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP) PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D, - UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D) + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP) PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM, - UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM) + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP) PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D, - UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D) + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP) PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM, - UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM) + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP) PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D, - UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D) + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP) PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT, UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP) From 9439bdd3524277946de080c9b359935810e77796 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Tue, 6 Feb 2024 09:16:02 +0000 Subject: [PATCH 05/34] Update UR TAG in CMakeLists --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 4095e698afa40..527b129d5db39 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -57,7 +57,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit c9f5374d9e991068431d20456e214f595d7920a5 + # commit 6ddda43379f9c226a5cf04f41dfbb923c5ff831f # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -69,7 +69,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG c9f5374d9e991068431d20456e214f595d7920a5) + set(UNIFIED_RUNTIME_TAG 6ddda43379f9c226a5cf04f41dfbb923c5ff831f) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") From 73614cbdd0a0d2f1da389f958ee69ed44a772c66 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Tue, 6 Feb 2024 10:45:04 +0000 Subject: [PATCH 06/34] Update CMakeLists UR TAG --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index d3d275d461fe4..db0c01f3067f0 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -57,7 +57,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 6ddda43379f9c226a5cf04f41dfbb923c5ff831f + # commit 098b31066bb30b5f5978d3e95e4e4070ef11bfe4 # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -69,7 +69,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 6ddda43379f9c226a5cf04f41dfbb923c5ff831f) + set(UNIFIED_RUNTIME_TAG 098b31066bb30b5f5978d3e95e4e4070ef11bfe4) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") From 17ba91f2619b06fd748f112d59d6574aa3298952 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Mon, 12 Feb 2024 16:34:16 +0000 Subject: [PATCH 07/34] Apply libclc macro changes to reduce LOC --- libclc/ptx-nvidiacl/libspirv/images/image.cl | 108 +++++++----------- .../sampled_fetch/fetch_3D.cpp | 4 +- 2 files changed, 44 insertions(+), 68 deletions(-) diff --git a/libclc/ptx-nvidiacl/libspirv/images/image.cl b/libclc/ptx-nvidiacl/libspirv/images/image.cl index 67553b03c2f84..20135c3ea7e70 100644 --- a/libclc/ptx-nvidiacl/libspirv/images/image.cl +++ b/libclc/ptx-nvidiacl/libspirv/images/image.cl @@ -2338,72 +2338,44 @@ float4 __nvvm_tex_3d_v4f32_i32(unsigned long, int, int, #define COORD_PARAMS_2D x, y #define COORD_PARAMS_3D x, y, z -_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(short, int, 1, v4i16, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(short, int, 2, v4i16, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(short, int, 3, v4i16, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 1, v4t16, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 2, v4t16, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 3, v4t16, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(char, int, 1, v4i8, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(char, int, 2, v4i8, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(char, int, 3, v4i8, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 1, v4h8, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 2, v4h8, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 3, v4h8, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(half, float, 1, v4f16, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(half, float, 2, v4f16, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(half, float, 3, v4f16, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D) - -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(float, float, 1, v2f32, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(float, float, 2, v2f32, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(float, float, 3, v2f32, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(int, int, 1, v2i32, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(int, int, 2, v2i32, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(int, int, 3, v2i32, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 1, v2j32, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 2, v2j32, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 3, v2j32, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(short, int, 1, v2i16, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(short, int, 2, v2i16, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(short, int, 3, v2i16, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 1, v2t16, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 2, v2t16, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 3, v2t16, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(char, int, 1, v2i8, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(char, int, 2, v2i8, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(char, int, 3, v2i8, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 1, v2h8, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 2, v2h8, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 3, v2h8, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(half, float, 1, v2f16, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(half, float, 2, v2f16, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(half, float, 3, v2f16, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D) - -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(float, float, 1, f32, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(float, float, 2, f32, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(float, float, 3, f32, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(int, int, 1, i32, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(int, int, 2, i32, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(int, int, 3, i32, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 1, j32, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 2, j32, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 3, j32, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(short, int, 1, i16, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(short, int, 2, i16, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(short, int, 3, i16, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 1, t16, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 2, t16, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 3, t16, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(char, int, 1, i8, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(char, int, 2, i8, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(char, int, 3, i8, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 1, h8, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 2, h8, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 3, h8, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(half, float, 1, f16, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(half, float, 2, f16, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D) -_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(half, float, 3, f16, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D) - +#define _CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(elem_t, fetch_elem_t, vec_size, fetch_vec_size) \ +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 1, vec_size, fetch_vec_size, COORD_INPUT_1D, COORD_PARAMS_1D) \ +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 2, vec_size, fetch_vec_size, COORD_INPUT_2D, COORD_PARAMS_2D) \ +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 3, vec_size, fetch_vec_size, COORD_INPUT_3D, COORD_PARAMS_3D) + +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(short, int, v4i16, v4i32) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(ushort, uint, v4t16, v4j32) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(char, int, v4i8, v4i32) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(uchar, uint, v4h8, v4j32) +_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(half, float, v4f16, v4f32) + +#define _CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(elem_t, fetch_elem_t, vec_size, fetch_vec_size) \ +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 1, vec_size, fetch_vec_size, COORD_INPUT_1D, COORD_PARAMS_1D) \ +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 2, vec_size, fetch_vec_size, COORD_INPUT_2D, COORD_PARAMS_2D) \ +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 3, vec_size, fetch_vec_size, COORD_INPUT_3D, COORD_PARAMS_3D) + +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(float, float, v2f32, v4f32) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(int, int, v2i32, v4i32) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(uint, uint, v2j32, v4j32) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(short, int, v2i16, v4i32) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(ushort, uint, v2t16, v4j32) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(char, int, v2i8, v4i32) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(uchar, uint, v2h8, v4j32) +_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(half, float, v2f16, v4f32) + +#define _CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(elem_t, fetch_elem_t, vec_size, fetch_vec_size) \ +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 1, vec_size, fetch_vec_size, COORD_INPUT_1D, COORD_PARAMS_1D) \ +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 2, vec_size, fetch_vec_size, COORD_INPUT_2D, COORD_PARAMS_2D) \ +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 3, vec_size, fetch_vec_size, COORD_INPUT_3D, COORD_PARAMS_3D) + +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(float, float, f32, v4f32) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(int, int, i32, v4i32) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(uint, uint, j32, v4j32) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(short, int, i16, v4i32) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(ushort, uint, t16, v4j32) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(char, int, i8, v4i32) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(uchar, uint, h8, v4j32) +_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(half, float, f16, v4f32) #undef COORD_INPUT_1D #undef COORD_INPUT_2D @@ -2417,6 +2389,10 @@ _CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(half, float, 3, f16, v4f32, COO #undef _CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN #undef _CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN +#undef _CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS +#undef _CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS +#undef _CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS + #define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN( \ elem_t, dimension, elem_t_mangled, vec_size, coord_mangled, coord_input, \ ...) \ diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp index 823eb8b7cc9b1..185d2b31b1515 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp @@ -8,7 +8,7 @@ #include // Uncomment to print additional test information -// #define VERBOSE_PRINT +#define VERBOSE_PRINT class kernel_sampled_fetch; @@ -54,7 +54,7 @@ int main() { sycl::image_channel_type::fp32); syclexp::bindless_image_sampler samp( - sycl::addressing_mode::none, + sycl::addressing_mode::repeat, sycl::coordinate_normalization_mode::unnormalized, sycl::filtering_mode::nearest); From a38a8a9ccda4a60ef3e701c005555db31432fc56 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Mon, 12 Feb 2024 16:45:35 +0000 Subject: [PATCH 08/34] Update UR repo TAG --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index db0c01f3067f0..a0e4f2cb66d4b 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -57,7 +57,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 098b31066bb30b5f5978d3e95e4e4070ef11bfe4 + # commit 18f3039ce5ba360a3ed00e614203bb4eda960e06 # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -69,7 +69,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 098b31066bb30b5f5978d3e95e4e4070ef11bfe4) + set(UNIFIED_RUNTIME_TAG 18f3039ce5ba360a3ed00e614203bb4eda960e06) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp index 185d2b31b1515..2cec4ab976a98 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp @@ -8,7 +8,7 @@ #include // Uncomment to print additional test information -#define VERBOSE_PRINT +// #define VERBOSE_PRINT class kernel_sampled_fetch; From 73c665a474bdf5efbbcd96e740a0dd43c8f0a5a8 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Thu, 22 Feb 2024 13:20:25 +0000 Subject: [PATCH 09/34] Fix deprecated function calls. Update UR tag & fetch tests --- sycl/include/sycl/detail/image_ocl_types.hpp | 19 +- .../sycl/ext/oneapi/bindless_images.hpp | 182 +++++++++--------- sycl/plugins/unified_runtime/CMakeLists.txt | 4 +- .../sampled_fetch/fetch_1D_USM.cpp | 2 +- .../sampled_fetch/fetch_2D.cpp | 2 +- .../sampled_fetch/fetch_2D_USM.cpp | 2 +- .../sampled_fetch/fetch_3D.cpp | 4 +- 7 files changed, 116 insertions(+), 99 deletions(-) diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp index 96b53b04f782c..2a10a722c565a 100644 --- a/sycl/include/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/sycl/detail/image_ocl_types.hpp @@ -84,13 +84,24 @@ static RetType __invoke__ImageFetch(ImageT Img, CoordT Coords) { // Convert from sycl types to builtin types to get correct function mangling. using TempRetT = sycl::detail::ConvertToOpenCLType_t; - using TempArgT = sycl::detail::ConvertToOpenCLType_t; + auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); - TempArgT Arg = sycl::detail::convertDataToType(Coords); - TempRetT Ret = __spirv_ImageFetch(Img, Arg); - return sycl::detail::convertDataToType(Ret); + return sycl::detail::convertFromOpenCLTypeFor( + __spirv_ImageFetch(Img, TmpCoords)); } +// template +// static RetType __invoke__ImageFetch(ImageT Img, CoordT Coords) { + +// // Convert from sycl types to builtin types to get correct function mangling. +// using TempRetT = sycl::detail::ConvertToOpenCLType_t; +// using TempArgT = sycl::detail::ConvertToOpenCLType_t; + +// TempArgT Arg = sycl::detail::convertDataToType(Coords); +// TempRetT Ret = __spirv_ImageFetch(Img, Arg); +// return sycl::detail::convertDataToType(Ret); +// } + template static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords, float Level) { diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index 18e6ffeac317c..b859e8a38c89e 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -792,34 +792,6 @@ template constexpr bool is_recognized_standard_type() { } // namespace detail -/** - * @brief [Deprecated] Read an unsampled image using its handle - * - * @tparam DataT The return type - * @tparam HintT A hint type that can be used to select for a specialized - * backend intrinsic when a user-defined type is passed as `DataT`. - * HintT should be a `sycl::vec` type, `sycl::half` type, or POD type. - * HintT must also have the same size as DataT. - * @tparam CoordT The input coordinate type. e.g. int, int2, or int3 for - * 1D, 2D, and 3D, respectively - * @param imageHandle The image handle - * @param coords The coordinates at which to fetch image data - * @return Image data - * - * __NVPTX__: Name mangling info - * Cuda surfaces require integer coords (by bytes) - * Cuda textures require float coords (by element or normalized) - * The name mangling should therefore not interfere with one - * another - */ -template -__SYCL_DEPRECATED("read_image for standard unsampled images is deprecated. " - "Instead use fetch_image.") -DataT read_image(const unsampled_image_handle &imageHandle [[maybe_unused]], - const CoordT &coords [[maybe_unused]]) { - return fetch_image(imageHandle, coords); -} - /** * @brief Fetch data from an unsampled image using its handle * @@ -867,18 +839,18 @@ DataT fetch_image(const unsampled_image_handle &imageHandle [[maybe_unused]], } /** - * @brief [Deprecated] Read a sampled image using its handle + * @brief [Deprecated] Read an unsampled image using its handle * * @tparam DataT The return type * @tparam HintT A hint type that can be used to select for a specialized * backend intrinsic when a user-defined type is passed as `DataT`. * HintT should be a `sycl::vec` type, `sycl::half` type, or POD type. * HintT must also have the same size as DataT. - * @tparam CoordT The input coordinate type. e.g. float, float2, or float3 for + * @tparam CoordT The input coordinate type. e.g. int, int2, or int3 for * 1D, 2D, and 3D, respectively * @param imageHandle The image handle - * @param coords The coordinates at which to sample image data - * @return Sampled image data + * @param coords The coordinates at which to fetch image data + * @return Image data * * __NVPTX__: Name mangling info * Cuda surfaces require integer coords (by bytes) @@ -887,11 +859,11 @@ DataT fetch_image(const unsampled_image_handle &imageHandle [[maybe_unused]], * another */ template -__SYCL_DEPRECATED("read_image for standard sampled images is deprecated. " - "Instead use sample_image or fetch_image.") -DataT read_image(const sampled_image_handle &imageHandle [[maybe_unused]], +__SYCL_DEPRECATED("read_image for standard unsampled images is deprecated. " + "Instead use fetch_image.") +DataT read_image(const unsampled_image_handle &imageHandle [[maybe_unused]], const CoordT &coords [[maybe_unused]]) { - return sample_image(imageHandle, coords); + return fetch_image(imageHandle, coords); } /** @@ -930,12 +902,12 @@ DataT fetch_image(const sampled_image_handle &imageHandle [[maybe_unused]], "HintT must always be a recognized standard type"); #ifdef __SYCL_DEVICE_ONLY__ - if constexpr (detail::is_recognized_standard_type()) { - return __invoke__ImageFetch(imageHandle.raw_handle, coords); - } else { - return sycl::bit_cast( - __invoke__ImageFetch(imageHandle.raw_handle, coords)); - } + if constexpr (detail::is_recognized_standard_type()) { + return __invoke__ImageRead(imageHandle.raw_handle, coords); + } else { + return sycl::bit_cast( + __invoke__ImageRead(imageHandle.raw_handle, coords)); + } #else assert(false); // Bindless images not yet implemented on host. #endif @@ -977,20 +949,19 @@ DataT sample_image(const sampled_image_handle &imageHandle [[maybe_unused]], "HintT must always be a recognized standard type"); #ifdef __SYCL_DEVICE_ONLY__ - if constexpr (detail::is_recognized_standard_type()) { - return __invoke__ImageRead(imageHandle.raw_handle, coords); - } else { - return sycl::bit_cast( - __invoke__ImageRead(imageHandle.raw_handle, coords)); - } + if constexpr (detail::is_recognized_standard_type()) { + return __invoke__ImageRead(imageHandle.raw_handle, coords); + } else { + return sycl::bit_cast( + __invoke__ImageRead(imageHandle.raw_handle, coords)); + } #else assert(false); // Bindless images not yet implemented on host. #endif } /** - * @brief [Deprecated] Read a mipmap image using its handle with LOD - * filtering + * @brief [Deprecated] Read a sampled image using its handle * * @tparam DataT The return type * @tparam HintT A hint type that can be used to select for a specialized @@ -999,45 +970,28 @@ DataT sample_image(const sampled_image_handle &imageHandle [[maybe_unused]], * HintT must also have the same size as DataT. * @tparam CoordT The input coordinate type. e.g. float, float2, or float3 for * 1D, 2D, and 3D, respectively - * @param imageHandle The mipmap image handle - * @param coords The coordinates at which to sample mipmap image data - * @param level The mipmap level at which to sample - * @return Mipmap image data with LOD filtering - */ -template -__SYCL_DEPRECATED("read_mipmap has been deprecated. " - "Instead use sample_mipmap.") -DataT read_mipmap(const sampled_image_handle &imageHandle [[maybe_unused]], - const CoordT &coords [[maybe_unused]], - const float level [[maybe_unused]]) { - return sample_mipmap(imageHandle, coords, level); -} - -/** - * @brief [Deprecated] Read a mipmap image using its handle with anisotropic - * filtering + * @param imageHandle The image handle + * @param coords The coordinates at which to sample image data + * @return Sampled image data * - * @tparam DataT The return type - * @tparam HintT A hint type that can be used to select for a specialized - * backend intrinsic when a user-defined type is passed as `DataT`. - * HintT should be a `sycl::vec` type, `sycl::half` type, or POD type. - * HintT must also have the same size as DataT. - * @tparam CoordT The input coordinate type. e.g. float, float2, or float3 for - * 1D, 2D, and 3D, respectively - * @param imageHandle The mipmap image handle - * @param coords The coordinates at which to sample mipmap image data - * @param dX Screen space gradient in the x dimension - * @param dY Screen space gradient in the y dimension - * @return Mipmap image data with anisotropic filtering + * __NVPTX__: Name mangling info + * Cuda surfaces require integer coords (by bytes) + * Cuda textures require float coords (by element or normalized) + * The name mangling should therefore not interfere with one + * another */ template -__SYCL_DEPRECATED("read_mipmap has been deprecated. " - "Instead use sample_mipmap.") -DataT read_mipmap(const sampled_image_handle &imageHandle [[maybe_unused]], - const CoordT &coords [[maybe_unused]], - const CoordT &dX [[maybe_unused]], - const CoordT &dY [[maybe_unused]]) { - return sample_mipmap(imageHandle, coords, dX, dY); +__SYCL_DEPRECATED("read_image for standard sampled images is deprecated. " + "Instead use sample_image with floating point coordinates or " + "fetch_image with integer coordinates.") +DataT read_image(const sampled_image_handle &imageHandle [[maybe_unused]], + const CoordT &coords [[maybe_unused]]) { + detail::assert_coords_type(); + if constexpr (detail::are_floating_coords()) { + return sample_image(imageHandle, coords); + } else if constexpr (detail::are_integer_coords()) { + return fetch_image(imageHandle, coords); + } } /** @@ -1127,6 +1081,58 @@ DataT sample_mipmap(const sampled_image_handle &imageHandle [[maybe_unused]], #endif } +/** + * @brief [Deprecated] Read a mipmap image using its handle with LOD + * filtering + * + * @tparam DataT The return type + * @tparam HintT A hint type that can be used to select for a specialized + * backend intrinsic when a user-defined type is passed as `DataT`. + * HintT should be a `sycl::vec` type, `sycl::half` type, or POD type. + * HintT must also have the same size as DataT. + * @tparam CoordT The input coordinate type. e.g. float, float2, or float3 for + * 1D, 2D, and 3D, respectively + * @param imageHandle The mipmap image handle + * @param coords The coordinates at which to sample mipmap image data + * @param level The mipmap level at which to sample + * @return Mipmap image data with LOD filtering + */ +template +__SYCL_DEPRECATED("read_mipmap has been deprecated. " + "Instead use sample_mipmap.") +DataT read_mipmap(const sampled_image_handle &imageHandle [[maybe_unused]], + const CoordT &coords [[maybe_unused]], + const float level [[maybe_unused]]) { + return sample_mipmap(imageHandle, coords, level); +} + +/** + * @brief [Deprecated] Read a mipmap image using its handle with anisotropic + * filtering + * + * @tparam DataT The return type + * @tparam HintT A hint type that can be used to select for a specialized + * backend intrinsic when a user-defined type is passed as `DataT`. + * HintT should be a `sycl::vec` type, `sycl::half` type, or POD type. + * HintT must also have the same size as DataT. + * @tparam CoordT The input coordinate type. e.g. float, float2, or float3 for + * 1D, 2D, and 3D, respectively + * @param imageHandle The mipmap image handle + * @param coords The coordinates at which to sample mipmap image data + * @param dX Screen space gradient in the x dimension + * @param dY Screen space gradient in the y dimension + * @return Mipmap image data with anisotropic filtering + */ +template +__SYCL_DEPRECATED("read_mipmap has been deprecated. " + "Instead use sample_mipmap.") +DataT read_mipmap(const sampled_image_handle &imageHandle [[maybe_unused]], + const CoordT &coords [[maybe_unused]], + const CoordT &dX [[maybe_unused]], + const CoordT &dY [[maybe_unused]]) { + return sample_mipmap(imageHandle, coords, dX, dY); +} + /** * @brief [Deprecated] Read a mipmap image using its handle with LOD * filtering @@ -1149,7 +1155,7 @@ __SYCL_DEPRECATED("read_image for mipmaps is deprecated. " DataT read_image(const sampled_image_handle &imageHandle [[maybe_unused]], const CoordT &coords [[maybe_unused]], const float level [[maybe_unused]]) { - return sample_mipmap(imageHandle, coords, level); + return sample_mipmap(imageHandle, coords, level); } /** @@ -1176,7 +1182,7 @@ DataT read_image(const sampled_image_handle &imageHandle [[maybe_unused]], const CoordT &coords [[maybe_unused]], const CoordT &dX [[maybe_unused]], const CoordT &dY [[maybe_unused]]) { - return sample_mipmap(imageHandle, coords, dX, dY); + return sample_mipmap(imageHandle, coords, dX, dY); } /** diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index c6f82486de2eb..4c88b3a971704 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -57,7 +57,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 18f3039ce5ba360a3ed00e614203bb4eda960e06 + # commit 747dad97f8da54c9c2c0bd255c20d6329a7e996f # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -69,7 +69,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 18f3039ce5ba360a3ed00e614203bb4eda960e06) + set(UNIFIED_RUNTIME_TAG 747dad97f8da54c9c2c0bd255c20d6329a7e996f) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp index bacafd92fcf21..557cbc7e9f243 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp @@ -57,7 +57,7 @@ int main() { cgh.parallel_for(width, [=](sycl::id<1> id) { // Extension: fetch data from sampled image handle - float px1 = syclexp::read_image(imgHandle, int(id[0])); + float px1 = syclexp::fetch_image(imgHandle, int(id[0])); outAcc[id] = px1; }); diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp index 0c5108f5b03f4..3af92e0c00935 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp @@ -79,7 +79,7 @@ int main() { // Extension: fetch data from sampled image handle float px1 = - syclexp::read_image(imgHandle, sycl::int2(dim0, dim1)); + syclexp::fetch_image(imgHandle, sycl::int2(dim0, dim1)); outAcc[sycl::id<2>{dim1, dim0}] = px1; }); diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp index 8932560b95e91..12608e3908b0a 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp @@ -84,7 +84,7 @@ int main() { size_t dim1 = it.get_local_id(1); // Extension: fetch data from sampled image handle - auto px1 = syclexp::read_image>( + auto px1 = syclexp::fetch_image>( imgHandle, sycl::int2(dim0, dim1)); outAcc[sycl::id<2>{dim1, dim0}] = px1; diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp index 2cec4ab976a98..12227c05407e4 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp @@ -82,8 +82,8 @@ int main() { size_t dim2 = it.get_local_id(2); // Extension: fetch data from sampled image handle - float px1 = syclexp::read_image( - imgHandle, sycl::vec(dim0, dim1, dim2, 0)); + float px1 = syclexp::fetch_image( + imgHandle, sycl::vec(dim0, dim1, dim2)); outAcc[sycl::id<3>{dim2, dim1, dim0}] = px1; }); From e1e22ba54dedb4351ca366ff4994795beb8d816f Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Thu, 22 Feb 2024 14:59:49 +0000 Subject: [PATCH 10/34] Make fetch operation naming consistent across the stack (API -> libclc) --- libclc/ptx-nvidiacl/libspirv/images/image.cl | 206 +++++++++--------- sycl/include/CL/__spirv/spirv_ops.hpp | 3 + sycl/include/sycl/detail/image_ocl_types.hpp | 21 +- .../sycl/ext/oneapi/bindless_images.hpp | 17 +- 4 files changed, 127 insertions(+), 120 deletions(-) diff --git a/libclc/ptx-nvidiacl/libspirv/images/image.cl b/libclc/ptx-nvidiacl/libspirv/images/image.cl index 0fbb2ef686bf5..feb7671813d80 100644 --- a/libclc/ptx-nvidiacl/libspirv/images/image.cl +++ b/libclc/ptx-nvidiacl/libspirv/images/image.cl @@ -1637,11 +1637,11 @@ void __nvvm_sust_3d_v4f16_clamp_s(unsigned long imageHandle, int x, int y, as_short(b), as_short(c), as_short(d)); } -#define _CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN( \ +#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN( \ elem_t, dimension, elem_t_mangled, vec_size, coord_mangled, coord_input, \ ...) \ _CLC_DEF elem_t MANGLE_FUNC_IMG_HANDLE( \ - 17, __spirv_ImageRead, I##elem_t_mangled, \ + 18, __spirv_ImageFetch, I##elem_t_mangled, \ coord_mangled##ET_T0_T1_)(ulong imageHandle, coord_input) { \ return __nvvm_suld_##dimension##d_##vec_size##_clamp_s(imageHandle, \ __VA_ARGS__); \ @@ -1656,94 +1656,94 @@ void __nvvm_sust_3d_v4f16_clamp_s(unsigned long imageHandle, int x, int y, __nvvm_sust_##dimension##d_##vec_size##_clamp_s(imageHandle, __VA_ARGS__); \ } -// READS +// Fetching unsampled images // Int -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int, 1, i, i32, i, int x, x * sizeof(int)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int, 2, i, i32, Dv2_i, int2 coord, coord.x * sizeof(int), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int, 3, i, i32, Dv3_i, int3 coord, coord.x * sizeof(int), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int2, 1, Dv2_i, v2i32, i, int x, x * sizeof(int2)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int2, 2, Dv2_i, v2i32, S0_, int2 coord, coord.x * sizeof(int2), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int2, 3, Dv2_i, v2i32, Dv3_i, int3 coord, coord.x * sizeof(int2), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int4, 1, Dv4_i, v4i32, i, int x, x * sizeof(int4)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int4, 2, Dv4_i, v4i32, Dv2_i, int2 coord, coord.x * sizeof(int4), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int4, 3, Dv4_i, v4i32, Dv3_i, int3 coord, coord.x * sizeof(int4), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int, 1, i, i32, i, int x, x * sizeof(int)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int, 2, i, i32, Dv2_i, int2 coord, coord.x * sizeof(int), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int, 3, i, i32, Dv3_i, int3 coord, coord.x * sizeof(int), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int2, 1, Dv2_i, v2i32, i, int x, x * sizeof(int2)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int2, 2, Dv2_i, v2i32, S0_, int2 coord, coord.x * sizeof(int2), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int2, 3, Dv2_i, v2i32, Dv3_i, int3 coord, coord.x * sizeof(int2), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int4, 1, Dv4_i, v4i32, i, int x, x * sizeof(int4)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int4, 2, Dv4_i, v4i32, Dv2_i, int2 coord, coord.x * sizeof(int4), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int4, 3, Dv4_i, v4i32, Dv3_i, int3 coord, coord.x * sizeof(int4), coord.y, coord.z) // Unsigned Int -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(unsigned int, 1, j, j32, i, int x, x * sizeof(unsigned int)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(unsigned int, 2, j, j32, Dv2_i, int2 coord, coord.x * sizeof(unsigned int), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(unsigned int, 3, j, j32, Dv3_i, int3 coord, coord.x * sizeof(unsigned int), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uint2, 1, Dv2_j, v2j32, i, int x, x * sizeof(uint2)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uint2, 2, Dv2_j, v2j32, Dv2_i, int2 coord, coord.x * sizeof(uint2), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uint2, 3, Dv2_j, v2j32, Dv3_i, int3 coord, coord.x * sizeof(uint2), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uint4, 1, Dv4_j, v4j32, i, int x, x * sizeof(uint4)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uint4, 2, Dv4_j, v4j32, Dv2_i, int2 coord, coord.x * sizeof(uint4), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uint4, 3, Dv4_j, v4j32, Dv3_i, int3 coord, coord.x * sizeof(uint4), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(unsigned int, 1, j, j32, i, int x, x * sizeof(unsigned int)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(unsigned int, 2, j, j32, Dv2_i, int2 coord, coord.x * sizeof(unsigned int), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(unsigned int, 3, j, j32, Dv3_i, int3 coord, coord.x * sizeof(unsigned int), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uint2, 1, Dv2_j, v2j32, i, int x, x * sizeof(uint2)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uint2, 2, Dv2_j, v2j32, Dv2_i, int2 coord, coord.x * sizeof(uint2), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uint2, 3, Dv2_j, v2j32, Dv3_i, int3 coord, coord.x * sizeof(uint2), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uint4, 1, Dv4_j, v4j32, i, int x, x * sizeof(uint4)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uint4, 2, Dv4_j, v4j32, Dv2_i, int2 coord, coord.x * sizeof(uint4), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uint4, 3, Dv4_j, v4j32, Dv3_i, int3 coord, coord.x * sizeof(uint4), coord.y, coord.z) // Short -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short, 1, s, i16, i, int x, x * sizeof(short)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short, 2, s, i16, Dv2_i, int2 coord, coord.x * sizeof(short), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short, 3, s, i16, Dv3_i, int3 coord, coord.x * sizeof(short), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short2, 1, Dv2_s, v2i16, i, int x, x * sizeof(short2)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short2, 2, Dv2_s, v2i16, Dv2_i, int2 coord, coord.x * sizeof(short2), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short2, 3, Dv2_s, v2i16, Dv3_i, int3 coord, coord.x * sizeof(short2), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short4, 1, Dv4_s, v4i16, i, int x, x * sizeof(short4)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short4, 2, Dv4_s, v4i16, Dv2_i, int2 coord, coord.x * sizeof(short4), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short4, 3, Dv4_s, v4i16, Dv3_i, int3 coord, coord.x * sizeof(short4), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short, 1, s, i16, i, int x, x * sizeof(short)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short, 2, s, i16, Dv2_i, int2 coord, coord.x * sizeof(short), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short, 3, s, i16, Dv3_i, int3 coord, coord.x * sizeof(short), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short2, 1, Dv2_s, v2i16, i, int x, x * sizeof(short2)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short2, 2, Dv2_s, v2i16, Dv2_i, int2 coord, coord.x * sizeof(short2), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short2, 3, Dv2_s, v2i16, Dv3_i, int3 coord, coord.x * sizeof(short2), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short4, 1, Dv4_s, v4i16, i, int x, x * sizeof(short4)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short4, 2, Dv4_s, v4i16, Dv2_i, int2 coord, coord.x * sizeof(short4), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short4, 3, Dv4_s, v4i16, Dv3_i, int3 coord, coord.x * sizeof(short4), coord.y, coord.z) // Unsigned Short -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort, 1, t, t16, i, int x, x * sizeof(ushort)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort, 2, t, t16, Dv2_i, int2 coord, coord.x * sizeof(ushort), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort, 3, t, t16, Dv3_i, int3 coord, coord.x * sizeof(ushort), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort2, 1, Dv2_t, v2t16, i, int x, x * sizeof(ushort2)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort2, 2, Dv2_t, v2t16, Dv2_i, int2 coord, coord.x * sizeof(ushort2), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort2, 3, Dv2_t, v2t16, Dv3_i, int3 coord, coord.x * sizeof(ushort2), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort4, 1, Dv4_t, v4t16, i, int x, x * sizeof(ushort4)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort4, 2, Dv4_t, v4t16, Dv2_i, int2 coord, coord.x * sizeof(ushort4), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort4, 3, Dv4_t, v4t16, Dv3_i, int3 coord, coord.x * sizeof(ushort4), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort, 1, t, t16, i, int x, x * sizeof(ushort)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort, 2, t, t16, Dv2_i, int2 coord, coord.x * sizeof(ushort), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort, 3, t, t16, Dv3_i, int3 coord, coord.x * sizeof(ushort), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 1, Dv2_t, v2t16, i, int x, x * sizeof(ushort2)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 2, Dv2_t, v2t16, Dv2_i, int2 coord, coord.x * sizeof(ushort2), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 3, Dv2_t, v2t16, Dv3_i, int3 coord, coord.x * sizeof(ushort2), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 1, Dv4_t, v4t16, i, int x, x * sizeof(ushort4)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 2, Dv4_t, v4t16, Dv2_i, int2 coord, coord.x * sizeof(ushort4), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 3, Dv4_t, v4t16, Dv3_i, int3 coord, coord.x * sizeof(ushort4), coord.y, coord.z) // Char -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char, 1, a, i8, i, int x, x * sizeof(char)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char, 2, a, i8, Dv2_i, int2 coord, coord.x * sizeof(char), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char, 3, a, i8, Dv3_i, int3 coord, coord.x * sizeof(char), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char2, 1, Dv2_a, v2i8, i, int x, x * sizeof(char2)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char2, 2, Dv2_a, v2i8, Dv2_i, int2 coord, coord.x * sizeof(char2), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char2, 3, Dv2_a, v2i8, Dv3_i, int3 coord, coord.x * sizeof(char2), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char4, 1, Dv4_a, v4i8, i, int x, x * sizeof(char4)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char4, 2, Dv4_a, v4i8, Dv2_i, int2 coord, coord.x * sizeof(char4), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char4, 3, Dv4_a, v4i8, Dv3_i, int3 coord, coord.x * sizeof(char4), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char, 1, a, i8, i, int x, x * sizeof(char)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char, 2, a, i8, Dv2_i, int2 coord, coord.x * sizeof(char), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char, 3, a, i8, Dv3_i, int3 coord, coord.x * sizeof(char), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char2, 1, Dv2_a, v2i8, i, int x, x * sizeof(char2)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char2, 2, Dv2_a, v2i8, Dv2_i, int2 coord, coord.x * sizeof(char2), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char2, 3, Dv2_a, v2i8, Dv3_i, int3 coord, coord.x * sizeof(char2), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char4, 1, Dv4_a, v4i8, i, int x, x * sizeof(char4)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char4, 2, Dv4_a, v4i8, Dv2_i, int2 coord, coord.x * sizeof(char4), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char4, 3, Dv4_a, v4i8, Dv3_i, int3 coord, coord.x * sizeof(char4), coord.y, coord.z) // Unsigned Char -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar, 1, h, h8, i, int x, x * sizeof(uchar)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar, 2, h, h8, Dv2_i, int2 coord, coord.x * sizeof(uchar), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar, 3, h, h8, Dv3_i, int3 coord, coord.x * sizeof(uchar), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar2, 1, Dv2_h, v2h8, i, int x, x * sizeof(uchar2)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar2, 2, Dv2_h, v2h8, Dv2_i, int2 coord, coord.x * sizeof(uchar2), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar2, 3, Dv2_h, v2h8, Dv3_i, int3 coord, coord.x * sizeof(uchar2), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar4, 1, Dv4_h, v4h8, i, int x, x * sizeof(uchar4)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar4, 2, Dv4_h, v4h8, Dv2_i, int2 coord, coord.x * sizeof(uchar4), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar4, 3, Dv4_h, v4h8, Dv3_i, int3 coord, coord.x * sizeof(uchar4), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar, 1, h, h8, i, int x, x * sizeof(uchar)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar, 2, h, h8, Dv2_i, int2 coord, coord.x * sizeof(uchar), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar, 3, h, h8, Dv3_i, int3 coord, coord.x * sizeof(uchar), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 1, Dv2_h, v2h8, i, int x, x * sizeof(uchar2)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 2, Dv2_h, v2h8, Dv2_i, int2 coord, coord.x * sizeof(uchar2), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 3, Dv2_h, v2h8, Dv3_i, int3 coord, coord.x * sizeof(uchar2), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 1, Dv4_h, v4h8, i, int x, x * sizeof(uchar4)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 2, Dv4_h, v4h8, Dv2_i, int2 coord, coord.x * sizeof(uchar4), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 3, Dv4_h, v4h8, Dv3_i, int3 coord, coord.x * sizeof(uchar4), coord.y, coord.z) // Float -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float, 1, f, f32, i, int x, x * sizeof(float)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float, 2, f, f32, Dv2_i, int2 coord, coord.x * sizeof(float), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float, 3, f, f32, Dv3_i, int3 coord, coord.x * sizeof(float), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float2, 1, Dv2_f, v2f32, i, int x, x * sizeof(float2)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float2, 2, Dv2_f, v2f32, Dv2_i, int2 coord, coord.x * sizeof(float2), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float2, 3, Dv2_f, v2f32, Dv3_i, int3 coord, coord.x * sizeof(float2), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float4, 1, Dv4_f, v4f32, i, int x, x * sizeof(float4)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float4, 2, Dv4_f, v4f32, Dv2_i, int2 coord, coord.x * sizeof(float4), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float4, 3, Dv4_f, v4f32, Dv3_i, int3 coord, coord.x * sizeof(float4), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float, 1, f, f32, i, int x, x * sizeof(float)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float, 2, f, f32, Dv2_i, int2 coord, coord.x * sizeof(float), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float, 3, f, f32, Dv3_i, int3 coord, coord.x * sizeof(float), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float2, 1, Dv2_f, v2f32, i, int x, x * sizeof(float2)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float2, 2, Dv2_f, v2f32, Dv2_i, int2 coord, coord.x * sizeof(float2), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float2, 3, Dv2_f, v2f32, Dv3_i, int3 coord, coord.x * sizeof(float2), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float4, 1, Dv4_f, v4f32, i, int x, x * sizeof(float4)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float4, 2, Dv4_f, v4f32, Dv2_i, int2 coord, coord.x * sizeof(float4), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float4, 3, Dv4_f, v4f32, Dv3_i, int3 coord, coord.x * sizeof(float4), coord.y, coord.z) // Half -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half, 1, DF16_, f16, i, int x, x * sizeof(half)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half, 2, DF16_, f16, Dv2_i, int2 coord, coord.x * sizeof(half), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half, 3, DF16_, f16, Dv3_i, int3 coord, coord.x * sizeof(half), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half2, 1, Dv2_DF16_, v2f16, i, int x, x * sizeof(half2)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half2, 2, Dv2_DF16_, v2f16, Dv2_i, int2 coord, coord.x * sizeof(half2), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half2, 3, Dv2_DF16_, v2f16, Dv3_i, int3 coord, coord.x * sizeof(half2), coord.y, coord.z) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half4, 1, Dv4_DF16_, v4f16, i, int x, x * sizeof(half4)) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half4, 2, Dv4_DF16_, v4f16, Dv2_i, int2 coord, coord.x * sizeof(half4), coord.y) -_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half4, 3, Dv4_DF16_, v4f16, Dv3_i, int3 coord, coord.x * sizeof(half4), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half, 1, DF16_, f16, i, int x, x * sizeof(half)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half, 2, DF16_, f16, Dv2_i, int2 coord, coord.x * sizeof(half), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half, 3, DF16_, f16, Dv3_i, int3 coord, coord.x * sizeof(half), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half2, 1, Dv2_DF16_, v2f16, i, int x, x * sizeof(half2)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half2, 2, Dv2_DF16_, v2f16, Dv2_i, int2 coord, coord.x * sizeof(half2), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half2, 3, Dv2_DF16_, v2f16, Dv3_i, int3 coord, coord.x * sizeof(half2), coord.y, coord.z) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half4, 1, Dv4_DF16_, v4f16, i, int x, x * sizeof(half4)) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half4, 2, Dv4_DF16_, v4f16, Dv2_i, int2 coord, coord.x * sizeof(half4), coord.y) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half4, 3, Dv4_DF16_, v4f16, Dv3_i, int3 coord, coord.x * sizeof(half4), coord.y, coord.z) // WRITES // Int @@ -2397,100 +2397,100 @@ _CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(half, float, f16, v4f3 elem_t, dimension, elem_t_mangled, vec_size, coord_mangled, coord_input, \ ...) \ _CLC_DEF elem_t MANGLE_FUNC_IMG_HANDLE( \ - 18, __spirv_ImageFetch, I##elem_t_mangled, \ + 25, __spirv_SampledImageFetch, I##elem_t_mangled, \ coord_mangled##ET_T0_T1_)(ulong imageHandle, coord_input) { \ return __nvvm_tex_##dimension##d_##vec_size##_i32(imageHandle, \ __VA_ARGS__); \ } + // Int _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int, 1, i, i32, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int, 2, i, i32, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int, 3, i, i32, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int, 3, i, i32, Dv3_i, int4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 1, Dv2_i, v2i32, i, int x, x) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 2, Dv2_i, v2i32, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 3, Dv2_i, v2i32, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 2, Dv2_i, v2i32, S0_, int2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 3, Dv2_i, v2i32, Dv3_i, int4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int4, 1, Dv4_i, v4i32, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int4, 2, Dv4_i, v4i32, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int4, 3, Dv4_i, v4i32, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int4, 3, Dv4_i, v4i32, Dv3_i, int4 coord, coord.x, coord.y, coord.z) // Unsigned int _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint, 1, j, j32, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint, 2, j, j32, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint, 3, j, j32, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint, 3, j, j32, Dv3_i, int4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint2, 1, Dv2_j, v2j32, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint2, 2, Dv2_j, v2j32, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint2, 3, Dv2_j, v2j32, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint2, 3, Dv2_j, v2j32, Dv3_i, int4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint4, 1, Dv4_j, v4j32, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint4, 2, Dv4_j, v4j32, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint4, 3, Dv4_j, v4j32, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint4, 3, Dv4_j, v4j32, Dv3_i, int4 coord, coord.x, coord.y, coord.z) // Short _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short, 1, s, i16, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short, 2, s, i16, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short, 3, s, i16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short, 3, s, i16, Dv3_i, int4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short2, 1, Dv2_s, v2i16, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short2, 2, Dv2_s, v2i16, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short2, 3, Dv2_s, v2i16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short2, 3, Dv2_s, v2i16, Dv3_i, int4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short4, 1, Dv4_s, v4i16, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short4, 2, Dv4_s, v4i16, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short4, 3, Dv4_s, v4i16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short4, 3, Dv4_s, v4i16, Dv3_i, int4 coord, coord.x, coord.y, coord.z) // Unsigned short _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort, 1, t, t16, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort, 2, t, t16, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort, 3, t, t16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort, 3, t, t16, Dv3_i, int4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 1, Dv2_t, v2t16, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 2, Dv2_t, v2t16, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 3, Dv2_t, v2t16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 3, Dv2_t, v2t16, Dv3_i, int4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 1, Dv4_t, v4t16, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 2, Dv4_t, v4t16, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 3, Dv4_t, v4t16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 3, Dv4_t, v4t16, Dv3_i, int4 coord, coord.x, coord.y, coord.z) // Char _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char, 1, a, i8, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char, 2, a, i8, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char, 3, a, i8, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char, 3, a, i8, Dv3_i, int4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char2, 1, Dv2_a, v2i8, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char2, 2, Dv2_a, v2i8, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char2, 3, Dv2_a, v2i8, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char2, 3, Dv2_a, v2i8, Dv3_i, int4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char4, 1, Dv4_a, v4i8, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char4, 2, Dv4_a, v4i8, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char4, 3, Dv4_a, v4i8, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char4, 3, Dv4_a, v4i8, Dv3_i, int4 coord, coord.x, coord.y, coord.z) // Unsigned Char _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar, 1, h, h8, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar, 2, h, h8, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar, 3, h, h8, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar, 3, h, h8, Dv3_i, int4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 1, Dv2_h, v2h8, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 2, Dv2_h, v2h8, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 3, Dv2_h, v2h8, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 3, Dv2_h, v2h8, Dv3_i, int4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 1, Dv4_h, v4h8, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 2, Dv4_h, v4h8, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 3, Dv4_h, v4h8, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 3, Dv4_h, v4h8, Dv3_i, int4 coord, coord.x, coord.y, coord.z) // Float _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float, 1, f, f32, i, uint x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float, 2, f, f32, Dv2_i, uint2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float, 3, f, f32, Dv4_i, uint4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float, 3, f, f32, Dv3_i, uint4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 1, Dv2_f, v2f32, i, uint x, x) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 2, Dv2_f, v2f32, S0_, uint2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 3, Dv2_f, v2f32, Dv4_i, uint4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 2, Dv2_f, v2f32, Dv2_i, uint2 coord, coord.x, coord.y) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 3, Dv2_f, v2f32, Dv3_i, uint4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float4, 1, Dv4_f, v4f32, i, uint x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float4, 2, Dv4_f, v4f32, Dv2_i, uint2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float4, 3, Dv4_f, v4f32, S0_, uint4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float4, 3, Dv4_f, v4f32, Dv3_i, uint4 coord, coord.x, coord.y, coord.z) // Half _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half, 1, DF16_, f16, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half, 2, DF16_, f16, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half, 3, DF16_, f16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half, 3, DF16_, f16, Dv3_i, int4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half2, 1, Dv2_DF16_, v2f16, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half2, 2, Dv2_DF16_, v2f16, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half2, 3, Dv2_DF16_, v2f16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half2, 3, Dv2_DF16_, v2f16, Dv3_i, int4 coord, coord.x, coord.y, coord.z) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half4, 1, Dv4_DF16_, v4f16, i, int x, x) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half4, 2, Dv4_DF16_, v4f16, Dv2_i, int2 coord, coord.x, coord.y) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half4, 3, Dv4_DF16_, v4f16, Dv4_i, int4 coord, coord.x, coord.y, coord.z) - +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half4, 3, Dv4_DF16_, v4f16, Dv3_i, int4 coord, coord.x, coord.y, coord.z) // <--- MIPMAP ---> diff --git a/sycl/include/CL/__spirv/spirv_ops.hpp b/sycl/include/CL/__spirv/spirv_ops.hpp index 8a0e6866351f9..a1bc9e407db8f 100644 --- a/sycl/include/CL/__spirv/spirv_ops.hpp +++ b/sycl/include/CL/__spirv/spirv_ops.hpp @@ -203,6 +203,9 @@ extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageRead(ImageT, TempArgT); template extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageFetch(ImageT, TempArgT); +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_SampledImageFetch(ImageT, TempArgT); + template extern __DPCPP_SYCL_EXTERNAL SampledType __spirv_SampledImage(ImageT, __ocl_sampler_t); diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp index 2a10a722c565a..8b825b5432e89 100644 --- a/sycl/include/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/sycl/detail/image_ocl_types.hpp @@ -87,20 +87,21 @@ static RetType __invoke__ImageFetch(ImageT Img, CoordT Coords) { auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); return sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageFetch(Img, TmpCoords)); + __spirv_ImageFetch(Img, + TmpCoords)); } -// template -// static RetType __invoke__ImageFetch(ImageT Img, CoordT Coords) { +template +static RetType __invoke__SampledImageFetch(ImageT Img, CoordT Coords) { -// // Convert from sycl types to builtin types to get correct function mangling. -// using TempRetT = sycl::detail::ConvertToOpenCLType_t; -// using TempArgT = sycl::detail::ConvertToOpenCLType_t; + // Convert from sycl types to builtin types to get correct function mangling. + using TempRetT = sycl::detail::ConvertToOpenCLType_t; + auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); -// TempArgT Arg = sycl::detail::convertDataToType(Coords); -// TempRetT Ret = __spirv_ImageFetch(Img, Arg); -// return sycl::detail::convertDataToType(Ret); -// } + return sycl::detail::convertFromOpenCLTypeFor( + __spirv_SampledImageFetch( + Img, TmpCoords)); +} template static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords, diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index b859e8a38c89e..01126b12d8ac5 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -808,7 +808,8 @@ template constexpr bool is_recognized_standard_type() { * * __NVPTX__: Name mangling info * Cuda surfaces require integer coords (by bytes) - * Cuda textures require float coords (by element or normalized) + * Cuda textures require float coords (by element or normalized) + * for sampling, and integer coords (by bytes) for fetching * The name mangling should therefore not interfere with one * another */ @@ -823,7 +824,7 @@ DataT fetch_image(const unsampled_image_handle &imageHandle [[maybe_unused]], #ifdef __SYCL_DEVICE_ONLY__ if constexpr (detail::is_recognized_standard_type()) { - return __invoke__ImageRead(imageHandle.raw_handle, coords); + return __invoke__ImageFetch(imageHandle.raw_handle, coords); } else { static_assert(sizeof(HintT) == sizeof(DataT), "When trying to read a user-defined type, HintT must be of " @@ -831,7 +832,7 @@ DataT fetch_image(const unsampled_image_handle &imageHandle [[maybe_unused]], static_assert(detail::is_recognized_standard_type(), "HintT must always be a recognized standard type"); return sycl::bit_cast( - __invoke__ImageRead(imageHandle.raw_handle, coords)); + __invoke__ImageFetch(imageHandle.raw_handle, coords)); } #else assert(false); // Bindless images not yet implemented on host @@ -854,7 +855,8 @@ DataT fetch_image(const unsampled_image_handle &imageHandle [[maybe_unused]], * * __NVPTX__: Name mangling info * Cuda surfaces require integer coords (by bytes) - * Cuda textures require float coords (by element or normalized) + * Cuda textures require float coords (by element or normalized) + * for sampling, and integer coords (by bytes) for fetching * The name mangling should therefore not interfere with one * another */ @@ -903,10 +905,10 @@ DataT fetch_image(const sampled_image_handle &imageHandle [[maybe_unused]], #ifdef __SYCL_DEVICE_ONLY__ if constexpr (detail::is_recognized_standard_type()) { - return __invoke__ImageRead(imageHandle.raw_handle, coords); + return __invoke__SampledImageFetch(imageHandle.raw_handle, coords); } else { return sycl::bit_cast( - __invoke__ImageRead(imageHandle.raw_handle, coords)); + __invoke__SampledImageFetch(imageHandle.raw_handle, coords)); } #else assert(false); // Bindless images not yet implemented on host. @@ -976,7 +978,8 @@ DataT sample_image(const sampled_image_handle &imageHandle [[maybe_unused]], * * __NVPTX__: Name mangling info * Cuda surfaces require integer coords (by bytes) - * Cuda textures require float coords (by element or normalized) + * Cuda textures require float coords (by element or normalized) + * for sampling, and integer coords (by bytes) for fetching * The name mangling should therefore not interfere with one * another */ From 74324c63e9ecf0c9c6d008788395b9bf6d6a4d01 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Thu, 22 Feb 2024 15:22:23 +0000 Subject: [PATCH 11/34] Update UR repo TAG --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 4c88b3a971704..3c30af782b54d 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -57,7 +57,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 747dad97f8da54c9c2c0bd255c20d6329a7e996f + # commit b386db5da1786da14dbcd5def02a381ebb098f15 # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -69,7 +69,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 747dad97f8da54c9c2c0bd255c20d6329a7e996f) + set(UNIFIED_RUNTIME_TAG b386db5da1786da14dbcd5def02a381ebb098f15) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") From 0cf37c2d28fdfef6afec25a07d4c941cc189cb80 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Thu, 22 Feb 2024 15:40:56 +0000 Subject: [PATCH 12/34] Add missing `fetch_image` API in specification document --- .../experimental/sycl_ext_oneapi_bindless_images.asciidoc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc index 3aa647222ffe9..ebf554d9ab446 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @@ -988,6 +988,10 @@ namespace sycl::ext::oneapi::experimental { template DataT fetch_image(const unsampled_image_handle &ImageHandle, const CoordT &Coords); + +template +DataT fetch_image(const sampled_image_handle &ImageHandle, + const CoordT &Coords); template DataT sample_image(const sampled_image_handle &ImageHandle, const CoordT &Coords); @@ -1009,7 +1013,8 @@ When fetching from a sampled image handle, data exatly as is in memory, no sampling operations will be performed, and the `bindless_image_sampler` passed to the image upon creation has no effect on the returned image data. Note that not all devices may support fetching of sampled image data depending on the -dimension or backing memory type. We provide device aspect queries for this in <>. +dimension or backing memory type. We provide device aspect queries for this in +<>. The user is required to pass a `DataT` template parameter, which specifies the return type of the `fetch_image` and `sample_image` functions. If `DataT` is From 72cb5a3c4c9528cb59376e64f51ae110b79b2ef6 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Thu, 22 Feb 2024 15:51:51 +0000 Subject: [PATCH 13/34] Fix PI versions in pi.h --- sycl/include/sycl/detail/pi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index c74adff94baa2..4380ae5c7468c 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -152,15 +152,15 @@ // 15.43 Changed the signature of piextMemGetNativeHandle to also take a // pi_device // 15.44 Add coarse-grain memory advice flag for HIP. -// 15.45 Added device queries for sampled image fetch support +// 15.45 Added piextKernelSuggestMaxCooperativeGroupCount and +// piextEnqueueCooperativeKernelLaunch. +// 15.46 Added device queries for sampled image fetch support // - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM // - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D // - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM // - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D // - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM // - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D -// 15.46 Added piextKernelSuggestMaxCooperativeGroupCount and -// piextEnqueueCooperativeKernelLaunch. #define _PI_H_VERSION_MAJOR 15 #define _PI_H_VERSION_MINOR 46 From e7cdd4340ec29ba96f4828808361c9dddcae1c77 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Thu, 22 Feb 2024 16:20:24 +0000 Subject: [PATCH 14/34] Fix non-e2e bindless images test --- sycl/test/extensions/bindless_images.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/extensions/bindless_images.cpp b/sycl/test/extensions/bindless_images.cpp index 0f7f72e60771b..eece72c070930 100644 --- a/sycl/test/extensions/bindless_images.cpp +++ b/sycl/test/extensions/bindless_images.cpp @@ -4,7 +4,7 @@ #include // CHECK: spir_kernel void @_ZTSN4sycl3_V16detail19__pf_kernel_wrapperI10image_readEE -// CHECK: tail call spir_func noundef <4 x float> @_Z17__spirv_ImageReadIDv4 +// CHECK: tail call spir_func noundef <4 x float> @_Z18__spirv_ImageFetchIDv4 using namespace sycl::ext::oneapi::experimental; class image_read; int main() { From 2cb042c71d68a833271e5f30086924639dc5cf43 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Mon, 4 Mar 2024 14:39:34 +0000 Subject: [PATCH 15/34] Revert existing aspect macros to original values --- sycl/include/sycl/device_aspect_macros.hpp | 158 ++++++++++----------- sycl/include/sycl/info/aspects.def | 28 ++-- 2 files changed, 93 insertions(+), 93 deletions(-) diff --git a/sycl/include/sycl/device_aspect_macros.hpp b/sycl/include/sycl/device_aspect_macros.hpp index b62f498b23ad3..0bd137e228a01 100644 --- a/sycl/include/sycl/device_aspect_macros.hpp +++ b/sycl/include/sycl/device_aspect_macros.hpp @@ -273,79 +273,79 @@ #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_mipmap_level_reference__ 0 #endif -#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ -//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 53) -#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ \ - 0 -#endif - -#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d__ -//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 54) -#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d__ 0 -#endif - -#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ -//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 55) -#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ \ - 0 -#endif - -#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d__ -//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 56) -#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d__ 0 -#endif - -#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ -//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 57) -#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ \ - 0 -#endif - -#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d__ -//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 58) -#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d__ 0 -#endif - #ifndef __SYCL_ALL_DEVICES_HAVE_ext_intel_esimd__ -//__SYCL_ASPECT(ext_intel_esimd, 59) +//__SYCL_ASPECT(ext_intel_esimd, 53) #define __SYCL_ALL_DEVICES_HAVE_ext_intel_esimd__ 0 #endif #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_ballot_group__ -// __SYCL_ASPECT(ext_oneapi_ballot_group, 60) +// __SYCL_ASPECT(ext_oneapi_ballot_group, 54) #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_ballot_group__ 0 #endif #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_fixed_size_group__ -// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 61) +// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 55) #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_fixed_size_group__ 0 #endif #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_opportunistic_group__ -// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 62) +// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 56) #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_opportunistic_group__ 0 #endif #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_tangle_group__ -// __SYCL_ASPECT(ext_oneapi_tangle_group, 63) +// __SYCL_ASPECT(ext_oneapi_tangle_group, 57) #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_tangle_group__ 0 #endif #ifndef __SYCL_ALL_DEVICES_HAVE_ext_intel_matrix__ -// __SYCL_ASPECT(ext_intel_matrix, 64) +// __SYCL_ASPECT(ext_intel_matrix, 58) #define __SYCL_ALL_DEVICES_HAVE_ext_intel_matrix__ 0 #endif #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_is_composite__ -// __SYCL_ASPECT(ext_oneapi_is_composite, 65) +// __SYCL_ASPECT(ext_oneapi_is_composite, 59) #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_is_composite__ 0 #endif #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_is_component__ -// __SYCL_ASPECT(ext_oneapi_is_component, 66) +// __SYCL_ASPECT(ext_oneapi_is_component, 60) #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_is_component__ 0 #endif +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 61) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ \ + 0 +#endif + +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 62) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d__ 0 +#endif + +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 63) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ \ + 0 +#endif + +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 64) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d__ 0 +#endif + +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 65) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ \ + 0 +#endif + +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 66) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d__ 0 +#endif + #ifndef __SYCL_ANY_DEVICE_HAS_host__ // __SYCL_ASPECT(host, 0) #define __SYCL_ANY_DEVICE_HAS_host__ 0 @@ -611,72 +611,72 @@ #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_mipmap_level_reference__ 0 #endif -#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ -//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 53) -#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ 0 -#endif - -#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d__ -//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 54) -#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d__ 0 -#endif - -#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ -//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 55) -#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ 0 -#endif - -#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d__ -//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 56) -#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d__ 0 -#endif - -#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ -//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 57) -#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ 0 -#endif - -#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d__ -//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 58) -#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d__ 0 -#endif - #ifndef __SYCL_ANY_DEVICE_HAS_ext_intel_esimd__ -//__SYCL_ASPECT(ext_intel_esimd, 59) +//__SYCL_ASPECT(ext_intel_esimd, 53) #define __SYCL_ANY_DEVICE_HAS_ext_intel_esimd__ 0 #endif #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_ballot_group__ -// __SYCL_ASPECT(ext_oneapi_ballot_group, 60) +// __SYCL_ASPECT(ext_oneapi_ballot_group, 54) #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_ballot_group__ 0 #endif #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_fixed_size_group__ -// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 61) +// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 55) #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_fixed_size_group__ 0 #endif #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_opportunistic_group__ -// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 62) +// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 56) #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_opportunistic_group__ 0 #endif #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_tangle_group__ -// __SYCL_ASPECT(ext_oneapi_tangle_group, 63) +// __SYCL_ASPECT(ext_oneapi_tangle_group, 57) #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_tangle_group__ 0 #endif #ifndef __SYCL_ANY_DEVICE_HAS_ext_intel_matrix__ -// __SYCL_ASPECT(ext_intel_matrix, 64) +// __SYCL_ASPECT(ext_intel_matrix, 58) #define __SYCL_ANY_DEVICE_HAS_ext_intel_matrix__ 0 #endif #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_is_composite__ -// __SYCL_ASPECT(ext_oneapi_is_composite, 65) +// __SYCL_ASPECT(ext_oneapi_is_composite, 59) #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_is_composite__ 0 #endif #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_is_component__ -// __SYCL_ASPECT(ext_oneapi_is_component, 66) +// __SYCL_ASPECT(ext_oneapi_is_component, 60) #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_is_component__ 0 #endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 61) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ 0 +#endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 62) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d__ 0 +#endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 63) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ 0 +#endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 64) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d__ 0 +#endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 65) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ 0 +#endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d__ +//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 66) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d__ 0 +#endif diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 739698d371fbf..c1df9c53abd5a 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -47,17 +47,17 @@ __SYCL_ASPECT(ext_oneapi_interop_semaphore_export, 49) __SYCL_ASPECT(ext_oneapi_mipmap, 50) __SYCL_ASPECT(ext_oneapi_mipmap_anisotropy, 51) __SYCL_ASPECT(ext_oneapi_mipmap_level_reference, 52) -__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 53) -__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 54) -__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 55) -__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 56) -__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 57) -__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 58) -__SYCL_ASPECT(ext_intel_esimd, 59) -__SYCL_ASPECT(ext_oneapi_ballot_group, 60) -__SYCL_ASPECT(ext_oneapi_fixed_size_group, 61) -__SYCL_ASPECT(ext_oneapi_opportunistic_group, 62) -__SYCL_ASPECT(ext_oneapi_tangle_group, 63) -__SYCL_ASPECT(ext_intel_matrix, 64) -__SYCL_ASPECT(ext_oneapi_is_composite, 65) -__SYCL_ASPECT(ext_oneapi_is_component, 66) +__SYCL_ASPECT(ext_intel_esimd, 53) +__SYCL_ASPECT(ext_oneapi_ballot_group, 54) +__SYCL_ASPECT(ext_oneapi_fixed_size_group, 55) +__SYCL_ASPECT(ext_oneapi_opportunistic_group, 56) +__SYCL_ASPECT(ext_oneapi_tangle_group, 57) +__SYCL_ASPECT(ext_intel_matrix, 58) +__SYCL_ASPECT(ext_oneapi_is_composite, 59) +__SYCL_ASPECT(ext_oneapi_is_component, 60) +__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 61) +__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 62) +__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 63) +__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 64) +__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 65) +__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 66) From 6b11a394406b2bbf43645bf2f9b2047657da9ead Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Tue, 5 Mar 2024 18:53:41 +0000 Subject: [PATCH 16/34] Use `// REQUIRES: aspect-<...>` instead of querying the device --- .../bindless_images/sampled_fetch/fetch_1D_USM.cpp | 10 +--------- .../bindless_images/sampled_fetch/fetch_2D.cpp | 10 +--------- .../bindless_images/sampled_fetch/fetch_2D_USM.cpp | 10 +--------- .../bindless_images/sampled_fetch/fetch_3D.cpp | 10 +--------- 4 files changed, 4 insertions(+), 36 deletions(-) diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp index 557cbc7e9f243..0644879136c29 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp @@ -1,5 +1,6 @@ // REQUIRES: linux // REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_1d_usm // RUN: %{build} -o %t.out // RUN: %{run} %t.out @@ -17,15 +18,6 @@ int main() { sycl::queue q(dev); auto ctxt = q.get_context(); - // Check if device supports 1D USM sampled image fetches - if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm)) { -#ifdef VERBOSE_PRINT - std::cout << "Test skipped due to lack of device support for fetching 1D " - "USM backed sampled images\n"; -#endif - return 0; - } - // declare image data constexpr size_t width = 16; std::vector out(width); diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp index 3af92e0c00935..0199bf9428194 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp @@ -1,5 +1,6 @@ // REQUIRES: linux // REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_2d // RUN: %{build} -o %t.out // RUN: %{run} %t.out @@ -18,15 +19,6 @@ int main() { sycl::queue q(dev); auto ctxt = q.get_context(); - // Check if device supports 2D non-USM sampled image fetches - if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_2d)) { -#ifdef VERBOSE_PRINT - std::cout << "Test skipped due to lack of device support for fetching 2D " - "non-USM backed sampled images\n"; -#endif - return 0; - } - // declare image data constexpr size_t width = 5; constexpr size_t height = 6; diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp index 12608e3908b0a..3e5c9991d0ac5 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp @@ -1,5 +1,6 @@ // REQUIRES: linux // REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_2d_usm // RUN: %{build} -o %t.out // RUN: %{run} %t.out @@ -18,15 +19,6 @@ int main() { sycl::queue q(dev); auto ctxt = q.get_context(); - // Check if device supports 2D USM sampled image fetches - if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm)) { -#ifdef VERBOSE_PRINT - std::cout << "Test skipped due to lack of device support for fetching 2D " - "USM backed sampled images\n"; -#endif - return 0; - } - // declare image data constexpr size_t width = 5; constexpr size_t height = 6; diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp index 12227c05407e4..2a08d36641b61 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp @@ -1,5 +1,6 @@ // REQUIRES: linux // REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_3d // RUN: %{build} -o %t.out // RUN: %{run} %t.out @@ -18,15 +19,6 @@ int main() { sycl::queue q(dev); auto ctxt = q.get_context(); - // Check if device supports 3D non-USM sampled image fetches - if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_3d)) { -#ifdef VERBOSE_PRINT - std::cout << "Test skipped due to lack of device support for fetching 3D " - "non-USM backed sampled images\n"; -#endif - return 0; - } - // declare image data constexpr size_t width = 4; constexpr size_t height = 6; From cd917da895405cc9d8c6e9a47433db72aa475e05 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Fri, 8 Mar 2024 14:24:19 +0000 Subject: [PATCH 17/34] Add verbose-print parameter to LIT config No source code changes are now required to print verbose test output --- sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp | 3 --- sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp | 3 --- sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp | 3 --- sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp | 3 --- sycl/test-e2e/format.py | 2 +- sycl/test-e2e/lit.cfg.py | 5 +++++ 6 files changed, 6 insertions(+), 13 deletions(-) diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp index 0644879136c29..75824bef09928 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp @@ -8,9 +8,6 @@ #include #include -// Uncomment to print additional test information -// #define VERBOSE_PRINT - class kernel_sampled_fetch; int main() { diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp index 0199bf9428194..6f6833748ec40 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp @@ -8,9 +8,6 @@ #include #include -// Uncomment to print additional test information -// #define VERBOSE_PRINT - class kernel_sampled_fetch; int main() { diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp index 3e5c9991d0ac5..6b91eea0f4482 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp @@ -8,9 +8,6 @@ #include #include -// Uncomment to print additional test information -// #define VERBOSE_PRINT - class kernel_sampled_fetch; int main() { diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp index 2a08d36641b61..f02cb389e7a97 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp @@ -8,9 +8,6 @@ #include #include -// Uncomment to print additional test information -// #define VERBOSE_PRINT - class kernel_sampled_fetch; int main() { diff --git a/sycl/test-e2e/format.py b/sycl/test-e2e/format.py index af44f2fece5af..b979c397cccfe 100644 --- a/sycl/test-e2e/format.py +++ b/sycl/test-e2e/format.py @@ -171,7 +171,7 @@ def execute(self, test, litConfig): # -that new tests by default would runnable there (unless they have # -other restrictions). substitutions.append( - ("%{build}", "%clangxx -fsycl -fsycl-targets=%{sycl_triple} %s") + ("%{build}", "%clangxx -fsycl -fsycl-targets=%{sycl_triple} %verbose_print %s") ) if platform.system() == "Windows": substitutions.append( diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index 869dce39a5a13..b46e674a4ffa3 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -376,6 +376,11 @@ ) config.substitutions.append(("%shared_lib", "-shared")) +# Check if user passed verbose-print parameter, if yes, add VERBOSE_PRINT macro +if 'verbose-print' in lit_config.params: + config.substitutions.append(("%verbose_print", "-DVERBOSE_PRINT")) +else: + config.substitutions.append(("%verbose_print", "")) config.substitutions.append(("%vulkan_include_dir", config.vulkan_include_dir)) config.substitutions.append(("%vulkan_lib", config.vulkan_lib)) From 8d792d27b6febc40f9e82e5c6b8ea827536df200 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Wed, 13 Mar 2024 10:09:33 +0000 Subject: [PATCH 18/34] Update UR TAG --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 94f16519b321d..66eb57c8afd57 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -57,7 +57,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 803036ecc42978d4ed77401a39991c0d1d1239a8 + # commit 30e4fd301f6c794f4a3216a79e14ccef04822d85 # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -69,7 +69,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 803036ecc42978d4ed77401a39991c0d1d1239a8) + set(UNIFIED_RUNTIME_TAG 30e4fd301f6c794f4a3216a79e14ccef04822d85) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") From 1657829382666ed5c064602069f13e0efa0d3738 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Thu, 4 Apr 2024 13:58:50 +0100 Subject: [PATCH 19/34] Formatting --- sycl/include/sycl/detail/pi.h | 2 +- sycl/include/sycl/ext/oneapi/bindless_images.hpp | 10 +++++----- sycl/test-e2e/format.py | 5 ++++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 4601ac636e803..948b061b76f32 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -458,7 +458,7 @@ typedef enum { PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_SUPPORT = 0x20113, PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT = 0x20114, - // Bindless images sampled image fetch + // Bindless images sampled image fetch PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM = 0x20115, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D = 0x20116, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM = 0x20117, diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index 06792136c5cbc..6dd5a98f3a989 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -806,7 +806,7 @@ template constexpr bool is_recognized_standard_type() { * * __NVPTX__: Name mangling info * Cuda surfaces require integer coords (by bytes) - * Cuda textures require float coords (by element or normalized) + * Cuda textures require float coords (by element or normalized) * for sampling, and integer coords (by bytes) for fetching * The name mangling should therefore not interfere with one * another @@ -853,7 +853,7 @@ DataT fetch_image(const unsampled_image_handle &imageHandle [[maybe_unused]], * * __NVPTX__: Name mangling info * Cuda surfaces require integer coords (by bytes) - * Cuda textures require float coords (by element or normalized) + * Cuda textures require float coords (by element or normalized) * for sampling, and integer coords (by bytes) for fetching * The name mangling should therefore not interfere with one * another @@ -882,7 +882,7 @@ DataT read_image(const unsampled_image_handle &imageHandle [[maybe_unused]], * * __NVPTX__: Name mangling info * Cuda surfaces require integer coords (by bytes) - * Cuda textures require float coords (by element or normalized) + * Cuda textures require float coords (by element or normalized) * for sampling, and integer coords (by bytes) for fetching * The name mangling should therefore not interfere with one * another @@ -929,7 +929,7 @@ DataT fetch_image(const sampled_image_handle &imageHandle [[maybe_unused]], * * __NVPTX__: Name mangling info * Cuda surfaces require integer coords (by bytes) - * Cuda textures require float coords (by element or normalized) + * Cuda textures require float coords (by element or normalized) * for sampling, and integer coords (by bytes) for fetching * The name mangling should therefore not interfere with one * another @@ -976,7 +976,7 @@ DataT sample_image(const sampled_image_handle &imageHandle [[maybe_unused]], * * __NVPTX__: Name mangling info * Cuda surfaces require integer coords (by bytes) - * Cuda textures require float coords (by element or normalized) + * Cuda textures require float coords (by element or normalized) * for sampling, and integer coords (by bytes) for fetching * The name mangling should therefore not interfere with one * another diff --git a/sycl/test-e2e/format.py b/sycl/test-e2e/format.py index b979c397cccfe..44a3566a16c54 100644 --- a/sycl/test-e2e/format.py +++ b/sycl/test-e2e/format.py @@ -171,7 +171,10 @@ def execute(self, test, litConfig): # -that new tests by default would runnable there (unless they have # -other restrictions). substitutions.append( - ("%{build}", "%clangxx -fsycl -fsycl-targets=%{sycl_triple} %verbose_print %s") + ( + "%{build}", + "%clangxx -fsycl -fsycl-targets=%{sycl_triple} %verbose_print %s", + ) ) if platform.system() == "Windows": substitutions.append( From f04cc34d7984402b2b958dd1797682e614fec46e Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Thu, 4 Apr 2024 14:28:48 +0100 Subject: [PATCH 20/34] Format. Use 'call' for device queries. --- sycl/source/detail/device_impl.cpp | 57 +++++++++++++----------------- sycl/test-e2e/lit.cfg.py | 2 +- 2 files changed, 25 insertions(+), 34 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index e6b5bf866a931..a1d318889e3c3 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -584,54 +584,45 @@ bool device_impl::has(aspect Aspect) const { } case aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm: { pi_bool support = PI_FALSE; - bool call_successful = - getPlugin()->call_nocheck( - MDevice, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; - return call_successful && support; + getPlugin()->call( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM, + sizeof(pi_bool), &support, nullptr); + return support; } case aspect::ext_oneapi_bindless_sampled_image_fetch_1d: { pi_bool support = PI_FALSE; - bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; - return call_successful && support; + getPlugin()->call( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D, + sizeof(pi_bool), &support, nullptr); + return support; } case aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm: { pi_bool support = PI_FALSE; - bool call_successful = - getPlugin()->call_nocheck( - MDevice, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; - return call_successful && support; + getPlugin()->call( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM, + sizeof(pi_bool), &support, nullptr); + return support; } case aspect::ext_oneapi_bindless_sampled_image_fetch_2d: { pi_bool support = PI_FALSE; - bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; - return call_successful && support; + getPlugin()->call( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D, + sizeof(pi_bool), &support, nullptr); + return support; } case aspect::ext_oneapi_bindless_sampled_image_fetch_3d_usm: { pi_bool support = PI_FALSE; - bool call_successful = - getPlugin()->call_nocheck( - MDevice, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; - return call_successful && support; + getPlugin()->call( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM, + sizeof(pi_bool), &support, nullptr); + return support; } case aspect::ext_oneapi_bindless_sampled_image_fetch_3d: { pi_bool support = PI_FALSE; - bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; - return call_successful && support; + getPlugin()->call( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D, + sizeof(pi_bool), &support, nullptr); + return support; } case aspect::ext_intel_esimd: { pi_bool support = PI_FALSE; diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index ded4628d61d44..b734e0fcd0acf 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -378,7 +378,7 @@ config.substitutions.append(("%shared_lib", "-shared")) # Check if user passed verbose-print parameter, if yes, add VERBOSE_PRINT macro -if 'verbose-print' in lit_config.params: +if "verbose-print" in lit_config.params: config.substitutions.append(("%verbose_print", "-DVERBOSE_PRINT")) else: config.substitutions.append(("%verbose_print", "")) From 17fa6db82dd19422049129e1e5bd6aa68d86a78c Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Tue, 9 Apr 2024 11:04:01 +0100 Subject: [PATCH 21/34] Update UR repo/tag --- sycl/plugins/unified_runtime/CMakeLists.txt | 46 ++++++++++----------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 610e46e80c203..44f066aa1f03e 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -91,14 +91,29 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 065bf2dd97b58a4ceeb2fb83eed1df9319e61c59 - # Merge: b9153547 ec773e6c - # Author: aarongreig - # Date: Fri Apr 5 14:26:59 2024 +0100 - # Merge pull request #1486 from nrspruit/fix_memfree_report - # [L0] Fix DeviceInfo global mem free to report unsupported given MemCount==0 - set(UNIFIED_RUNTIME_TAG 065bf2dd97b58a4ceeb2fb83eed1df9319e61c59) + # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + # # commit 065bf2dd97b58a4ceeb2fb83eed1df9319e61c59 + # # Merge: b9153547 ec773e6c + # # Author: aarongreig + # # Date: Fri Apr 5 14:26:59 2024 +0100 + # # Merge pull request #1486 from nrspruit/fix_memfree_report + # # [L0] Fix DeviceInfo global mem free to report unsupported given MemCount==0 + # set(UNIFIED_RUNTIME_TAG 065bf2dd97b58a4ceeb2fb83eed1df9319e61c59) + + set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") + # commit 4dd5604c3d7a7b2b606c97ba88cffb6363eebe2c + # Author: Przemek Malon + # Date: Wed Nov 29 11:25:34 2023 +0000 + # [Bindless][Exp] Add device queries for sampled image fetch + # Added the following queries for device capabilities of fetching sampled + # images: + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP + # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP + set(UNIFIED_RUNTIME_TAG 4dd5604c3d7a7b2b606c97ba88cffb6363eebe2c) fetch_adapter_source(level_zero ${UNIFIED_RUNTIME_REPO} @@ -130,21 +145,6 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) ${UNIFIED_RUNTIME_TAG} ) - set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 4dd5604c3d7a7b2b606c97ba88cffb6363eebe2c - # Author: Przemek Malon - # Date: Wed Nov 29 11:25:34 2023 +0000 - # [Bindless][Exp] Add device queries for sampled image fetch - # Added the following queries for device capabilities of fetching sampled - # images: - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 4dd5604c3d7a7b2b606c97ba88cffb6363eebe2c) - if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") endif() From 54d18128f2a15582b382f43d40403a71a8903219 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Tue, 9 Apr 2024 11:48:55 +0100 Subject: [PATCH 22/34] Undo removal of comment --- sycl/plugins/unified_runtime/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 44f066aa1f03e..3ff10a7dd58b1 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -56,6 +56,9 @@ endif() if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) + # The fetch_adapter_source function can be used to perform a separate content + # fetch for a UR adapter, this allows development of adapters to be decoupled + # from each other. # # A separate content fetch will not be performed if: # * The adapter name is not present in the SYCL_ENABLE_PLUGINS variable. From ff907c2a9712e60dafe349a251acfaed5186ffdb Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Tue, 9 Apr 2024 11:54:19 +0100 Subject: [PATCH 23/34] Revert aspect queries to call_nocheck --- sycl/source/detail/device_impl.cpp | 57 +++++++++++++++++------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index a1d318889e3c3..e6b5bf866a931 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -584,45 +584,54 @@ bool device_impl::has(aspect Aspect) const { } case aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm: { pi_bool support = PI_FALSE; - getPlugin()->call( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM, - sizeof(pi_bool), &support, nullptr); - return support; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; } case aspect::ext_oneapi_bindless_sampled_image_fetch_1d: { pi_bool support = PI_FALSE; - getPlugin()->call( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D, - sizeof(pi_bool), &support, nullptr); - return support; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; } case aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm: { pi_bool support = PI_FALSE; - getPlugin()->call( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM, - sizeof(pi_bool), &support, nullptr); - return support; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; } case aspect::ext_oneapi_bindless_sampled_image_fetch_2d: { pi_bool support = PI_FALSE; - getPlugin()->call( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D, - sizeof(pi_bool), &support, nullptr); - return support; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; } case aspect::ext_oneapi_bindless_sampled_image_fetch_3d_usm: { pi_bool support = PI_FALSE; - getPlugin()->call( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM, - sizeof(pi_bool), &support, nullptr); - return support; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; } case aspect::ext_oneapi_bindless_sampled_image_fetch_3d: { pi_bool support = PI_FALSE; - getPlugin()->call( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D, - sizeof(pi_bool), &support, nullptr); - return support; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; } case aspect::ext_intel_esimd: { pi_bool support = PI_FALSE; From d80630b7d5f99e8f9962a60c11608cd8658f49ff Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Tue, 9 Apr 2024 19:04:19 +0100 Subject: [PATCH 24/34] Update UR tag --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 8159bdb656bfd..0a059746d4c3a 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 4dd5604c3d7a7b2b606c97ba88cffb6363eebe2c + # commit d05b206d62329b67a36708127afb052c4f3f273e # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 4dd5604c3d7a7b2b606c97ba88cffb6363eebe2c) + set(UNIFIED_RUNTIME_TAG d05b206d62329b67a36708127afb052c4f3f273e) # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") # # commit a7c202b49aff130f60da0c03916d08bb22b91aa0 From 773b49cf36cb7ab5ae882e7d1e39d54a0003f96b Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Wed, 10 Apr 2024 10:38:25 +0100 Subject: [PATCH 25/34] Update UR tag --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 0a059746d4c3a..7aa8bc5b04bfc 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit d05b206d62329b67a36708127afb052c4f3f273e + # commit 4202d207359b14d08e821d28d36608a5605f1ace # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG d05b206d62329b67a36708127afb052c4f3f273e) + set(UNIFIED_RUNTIME_TAG 4202d207359b14d08e821d28d36608a5605f1ace) # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") # # commit a7c202b49aff130f60da0c03916d08bb22b91aa0 From 0fc3629e5334687685b413c746510b38f84f3736 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Wed, 10 Apr 2024 11:17:14 +0100 Subject: [PATCH 26/34] Update UR tag --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 7aa8bc5b04bfc..423a5a03b3280 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 4202d207359b14d08e821d28d36608a5605f1ace + # commit 916e458b7f45c147061ef3049c78c649a12a2d23 # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 4202d207359b14d08e821d28d36608a5605f1ace) + set(UNIFIED_RUNTIME_TAG 916e458b7f45c147061ef3049c78c649a12a2d23) # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") # # commit a7c202b49aff130f60da0c03916d08bb22b91aa0 From a9cb08a51c13c255216cca86f6dd49e1a0eaf0db Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Wed, 10 Apr 2024 11:23:39 +0100 Subject: [PATCH 27/34] Update UR tag --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 423a5a03b3280..b9af8dfa6dd38 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 916e458b7f45c147061ef3049c78c649a12a2d23 + # commit 8fe880bf49d1e428421c3913e6f23cbe367755c0 # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 916e458b7f45c147061ef3049c78c649a12a2d23) + set(UNIFIED_RUNTIME_TAG 8fe880bf49d1e428421c3913e6f23cbe367755c0) # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") # # commit a7c202b49aff130f60da0c03916d08bb22b91aa0 From 711109f7bffa359d7bcd2a12ebdb0e1c9e129e69 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Wed, 10 Apr 2024 12:03:02 +0100 Subject: [PATCH 28/34] Update UR tag --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index b9af8dfa6dd38..13b70402c7109 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 8fe880bf49d1e428421c3913e6f23cbe367755c0 + # commit 60b8afe176a8ab6efd02479120ac5d9b3f460b0b # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 8fe880bf49d1e428421c3913e6f23cbe367755c0) + set(UNIFIED_RUNTIME_TAG 60b8afe176a8ab6efd02479120ac5d9b3f460b0b) # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") # # commit a7c202b49aff130f60da0c03916d08bb22b91aa0 From e59df310e88554d556d346aa6dd5970f468c5789 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Wed, 10 Apr 2024 14:37:03 +0100 Subject: [PATCH 29/34] Update UR tag --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 13b70402c7109..973a7755e5fe7 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 60b8afe176a8ab6efd02479120ac5d9b3f460b0b + # commit 69e70cb716a162b60c17ff677850c5e43cb3523e # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 60b8afe176a8ab6efd02479120ac5d9b3f460b0b) + set(UNIFIED_RUNTIME_TAG 69e70cb716a162b60c17ff677850c5e43cb3523e) # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") # # commit a7c202b49aff130f60da0c03916d08bb22b91aa0 From 94a43048465e6fb61a1abab900c7a0570bd7ae4f Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Wed, 10 Apr 2024 16:48:50 +0100 Subject: [PATCH 30/34] Update UR tag --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 69980c6d661e9..f3f2cfd2e1a9d 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 69e70cb716a162b60c17ff677850c5e43cb3523e + # commit 0ead0ca13cb59c3273ab3add872f55d52c900c97 # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 69e70cb716a162b60c17ff677850c5e43cb3523e) + set(UNIFIED_RUNTIME_TAG 0ead0ca13cb59c3273ab3add872f55d52c900c97) # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") # # commit a7c202b49aff130f60da0c03916d08bb22b91aa0 From d559f35ed863203441f3d78ad093cb508d59c9c9 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Wed, 10 Apr 2024 17:56:06 +0100 Subject: [PATCH 31/34] Update UR tag --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 8559852b07389..2c3f396b87315 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 0ead0ca13cb59c3273ab3add872f55d52c900c97 + # commit 323fd84406375735f8bb0de091accf63c1ba8c1e # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 0ead0ca13cb59c3273ab3add872f55d52c900c97) + set(UNIFIED_RUNTIME_TAG 323fd84406375735f8bb0de091accf63c1ba8c1e) # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") # # commit e00a764f8dafd1319b636aa99c15601ec0d4d7fc From 1f45ea1b915a9a639782457e0a0a04cfe475f9f5 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Wed, 10 Apr 2024 18:34:16 +0100 Subject: [PATCH 32/34] Update UR tag --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 8389686a25193..e8ea88da6f9d1 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 323fd84406375735f8bb0de091accf63c1ba8c1e + # commit 21f023f386c3352d11ceb16959f8b54a2be34757 # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 323fd84406375735f8bb0de091accf63c1ba8c1e) + set(UNIFIED_RUNTIME_TAG 21f023f386c3352d11ceb16959f8b54a2be34757) # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") # # commit e00a764f8dafd1319b636aa99c15601ec0d4d7fc From cb9cd5f77c8367fdf19775e2c7866eb140aa757e Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Tue, 30 Apr 2024 13:27:50 +0100 Subject: [PATCH 33/34] Update UR Tag --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 0be29ca0f614b..35a19806c5361 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -104,7 +104,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # set(UNIFIED_RUNTIME_TAG 717791bfc636a1cd69f95f09fea15fcd70a9fb23) set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 8f62b5add090372214a0f872fa6630d3bdaff14c + # commit 56142ad69cdc34c4e090a852fd3c599446143ba2 # Author: Przemek Malon # Date: Wed Nov 29 11:25:34 2023 +0000 # [Bindless][Exp] Add device queries for sampled image fetch @@ -118,7 +118,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 8f62b5add090372214a0f872fa6630d3bdaff14c) + set(UNIFIED_RUNTIME_TAG 56142ad69cdc34c4e090a852fd3c599446143ba2) fetch_adapter_source(level_zero ${UNIFIED_RUNTIME_REPO} From 0e55b21b2dd954cd0c371fb410f120ac5c9a8555 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Thu, 2 May 2024 11:23:11 +0100 Subject: [PATCH 34/34] Update UR Tag --- sycl/plugins/unified_runtime/CMakeLists.txt | 32 ++++++--------------- 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 35a19806c5361..19ae36bdc46a4 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -94,31 +94,15 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # # commit 717791bfc636a1cd69f95f09fea15fcd70a9fb23 - # # Merge: 601062ba f8c4facd - # # Author: aarongreig - # # Date: Fri Apr 19 10:35:14 2024 +0100 - # # Merge pull request #1517 from nrspruit/fix_l0_coverity_sync - # # [L0] Store LastCommandEvent before unlock during queue sync - # set(UNIFIED_RUNTIME_TAG 717791bfc636a1cd69f95f09fea15fcd70a9fb23) - - set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") - # commit 56142ad69cdc34c4e090a852fd3c599446143ba2 - # Author: Przemek Malon - # Date: Wed Nov 29 11:25:34 2023 +0000 + set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + # commit ebf873fb5996c9ddca32bbb7c9330d3ffe15473c + # Merge: 633ec408 8f375039 + # Author: Kenneth Benzie (Benie) + # Date: Thu May 2 10:56:55 2024 +0100 + # Merge pull request #1535 from przemektmalon/przemek/sampled-image-fetch + # # [Bindless][Exp] Add device queries for sampled image fetch - # - # Added the following queries for device capabilities of fetching - # sampled images: - # - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP - # - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP - set(UNIFIED_RUNTIME_TAG 56142ad69cdc34c4e090a852fd3c599446143ba2) + set(UNIFIED_RUNTIME_TAG ebf873fb5996c9ddca32bbb7c9330d3ffe15473c) fetch_adapter_source(level_zero ${UNIFIED_RUNTIME_REPO}