From 3ea1db3876413a680e6f8f62d2710407bb2480d4 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Tue, 21 Nov 2023 12:13:32 +0000
Subject: [PATCH 01/34] [Bindless][Exp] Add texture fetch functionality

With this commit, it is now possible to "fetch" sampled image data by
passing integer coordinates to the `read_image` funtion.
---
 libclc/ptx-nvidiacl/libspirv/images/image.cl  | 245 ++++++++++++++++++
 .../libspirv/images/image_helpers.ll          |  74 +++++-
 .../llvm/SYCLLowerIR/DeviceConfigFile.td      |  12 +-
 .../sycl_ext_oneapi_bindless_images.asciidoc  |  47 +++-
 sycl/include/CL/__spirv/spirv_ops.hpp         |   3 +
 sycl/include/sycl/detail/image_ocl_types.hpp  |  12 +
 sycl/include/sycl/detail/pi.h                 |  16 +-
 sycl/include/sycl/device_aspect_macros.hpp    |  83 +++++-
 .../sycl/ext/oneapi/bindless_images.hpp       |  54 +++-
 sycl/include/sycl/info/aspects.def            |  16 +-
 sycl/plugins/unified_runtime/CMakeLists.txt   |  22 +-
 sycl/plugins/unified_runtime/pi2ur.hpp        |  18 ++
 sycl/source/detail/device_impl.cpp            |  51 ++++
 .../bindless_images/image_get_info.cpp        |  27 +-
 .../sampled_fetch/fetch_1D_USM.cpp            | 104 ++++++++
 .../sampled_fetch/fetch_2D.cpp                | 125 +++++++++
 .../sampled_fetch/fetch_2D_USM.cpp            | 132 ++++++++++
 .../sampled_fetch/fetch_3D.cpp                | 129 +++++++++
 sycl/test-e2e/bindless_images/sampling_1D.cpp |   2 +-
 19 files changed, 1130 insertions(+), 42 deletions(-)
 create mode 100644 sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp
 create mode 100644 sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp
 create mode 100644 sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp
 create mode 100644 sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp

diff --git a/libclc/ptx-nvidiacl/libspirv/images/image.cl b/libclc/ptx-nvidiacl/libspirv/images/image.cl
index f55f0c435cf35..67553b03c2f84 100644
--- a/libclc/ptx-nvidiacl/libspirv/images/image.cl
+++ b/libclc/ptx-nvidiacl/libspirv/images/image.cl
@@ -1837,6 +1837,8 @@ _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN(half4, 3, Dv4_i, Dv4_DF16_, v4f16, int4
 
 // <--- TEXTURES --->
 
+// <--- Texture sampling (float coords) --->
+
 // Int
 int4 __nvvm_tex_1d_v4i32_f32(unsigned long,
                              float) __asm("__clc_llvm_nvvm_tex_1d_v4i32_f32");
@@ -2270,6 +2272,249 @@ _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN(half4, 1, Dv4_DF16_, v4f16, f, fl
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN(half4, 2, Dv4_DF16_, v4f16, Dv2_f, float2 coord, coord.x, coord.y)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN(half4, 3, Dv4_DF16_, v4f16, Dv4_f, float4 coord, coord.x, coord.y, coord.z)
 
+// <--- Texture fetching (integer coords) --->
+
+// Int
+int4 __nvvm_tex_1d_v4i32_i32(unsigned long,
+                             int) __asm("__clc_llvm_nvvm_tex_1d_v4i32_s32");
+int4 __nvvm_tex_2d_v4i32_i32(unsigned long, int,
+                             int) __asm("__clc_llvm_nvvm_tex_2d_v4i32_s32");
+int4 __nvvm_tex_3d_v4i32_i32(unsigned long, int, int,
+                             int) __asm("__clc_llvm_nvvm_tex_3d_v4i32_s32");
+
+// Unsigned int
+uint4 __nvvm_tex_1d_v4j32_i32(unsigned long,
+                              int) __asm("__clc_llvm_nvvm_tex_1d_v4j32_s32");
+uint4 __nvvm_tex_2d_v4j32_i32(unsigned long, int,
+                              int) __asm("__clc_llvm_nvvm_tex_2d_v4j32_s32");
+uint4 __nvvm_tex_3d_v4j32_i32(unsigned long, int, int,
+                              int) __asm("__clc_llvm_nvvm_tex_3d_v4j32_s32");
+
+// Float
+float4 __nvvm_tex_1d_v4f32_i32(unsigned long,
+                               int) __asm("__clc_llvm_nvvm_tex_1d_v4f32_s32");
+float4 __nvvm_tex_2d_v4f32_i32(unsigned long, int,
+                               int) __asm("__clc_llvm_nvvm_tex_2d_v4f32_s32");
+float4 __nvvm_tex_3d_v4f32_i32(unsigned long, int, int,
+                               int) __asm("__clc_llvm_nvvm_tex_3d_v4f32_s32");
+
+// Macro to generate texture vec4 fetches
+#define _CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(                  \
+    elem_t, fetch_elem_t, dimension, vec_size, fetch_vec_size, coord_input,    \
+    coord_parameter)                                                           \
+  elem_t##4 __nvvm_tex_##dimension##d_##vec_size##_i32(                        \
+      unsigned long imageHandle, coord_input) {                                \
+    fetch_elem_t##4 a = __nvvm_tex_##dimension##d_##fetch_vec_size##_i32(      \
+        imageHandle, coord_parameter);                                         \
+    return cast_##fetch_elem_t##4_to_##elem_t##4(a);                           \
+  }
+
+// Macro to generate texture vec2 fetches
+#define _CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(                  \
+    elem_t, fetch_elem_t, dimension, vec_size, fetch_vec_size, coord_input,    \
+    coord_parameter)                                                           \
+  elem_t##2 __nvvm_tex_##dimension##d_##vec_size##_i32(                        \
+      unsigned long imageHandle, coord_input) {                                \
+    fetch_elem_t##4 a = __nvvm_tex_##dimension##d_##fetch_vec_size##_i32(      \
+        imageHandle, coord_parameter);                                         \
+    return cast_##fetch_elem_t##4_to_##elem_t##2(a);                           \
+  }
+
+// Macro to generate texture singular data type fetches
+#define _CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(                      \
+    elem_t, fetch_elem_t, dimension, vec_size, fetch_vec_size, coord_input,    \
+    coord_parameter)                                                           \
+  elem_t __nvvm_tex_##dimension##d_##vec_size##_i32(unsigned long imageHandle, \
+                                                    coord_input) {             \
+    return (elem_t)__nvvm_tex_##dimension##d_##fetch_vec_size##_i32(           \
+        imageHandle, coord_parameter)[0];                                      \
+  }
+
+#define COORD_INPUT_1D int x
+#define COORD_INPUT_2D int x, int y
+#define COORD_INPUT_3D int x, int y, int z
+
+#define COORD_PARAMS_1D x
+#define COORD_PARAMS_2D x, y
+#define COORD_PARAMS_3D x, y, z
+
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(short, int, 1, v4i16, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(short, int, 2, v4i16, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(short, int, 3, v4i16, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 1, v4t16, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 2, v4t16, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 3, v4t16, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(char, int, 1, v4i8, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(char, int, 2, v4i8, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(char, int, 3, v4i8, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 1, v4h8, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 2, v4h8, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 3, v4h8, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(half, float, 1, v4f16, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(half, float, 2, v4f16, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(half, float, 3, v4f16, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D)
+
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(float, float, 1, v2f32, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(float, float, 2, v2f32, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(float, float, 3, v2f32, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(int, int, 1, v2i32, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(int, int, 2, v2i32, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(int, int, 3, v2i32, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 1, v2j32, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 2, v2j32, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 3, v2j32, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(short, int, 1, v2i16, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(short, int, 2, v2i16, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(short, int, 3, v2i16, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 1, v2t16, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 2, v2t16, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 3, v2t16, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(char, int, 1, v2i8, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(char, int, 2, v2i8, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(char, int, 3, v2i8, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 1, v2h8, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 2, v2h8, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 3, v2h8, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(half, float, 1, v2f16, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(half, float, 2, v2f16, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(half, float, 3, v2f16, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D)
+
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(float, float, 1, f32, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(float, float, 2, f32, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(float, float, 3, f32, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(int, int, 1, i32, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(int, int, 2, i32, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(int, int, 3, i32, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 1, j32, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 2, j32, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 3, j32, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(short, int, 1, i16, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(short, int, 2, i16, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(short, int, 3, i16, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 1, t16, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 2, t16, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 3, t16, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(char, int, 1, i8, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(char, int, 2, i8, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(char, int, 3, i8, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 1, h8, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 2, h8, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 3, h8, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(half, float, 1, f16, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(half, float, 2, f16, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(half, float, 3, f16, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D)
+
+
+#undef COORD_INPUT_1D
+#undef COORD_INPUT_2D
+#undef COORD_INPUT_3D
+
+#undef COORD_PARAMS_1D
+#undef COORD_PARAMS_2D
+#undef COORD_PARAMS_3D
+
+#undef _CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN
+#undef _CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN
+#undef _CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN
+
+#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(                       \
+    elem_t, dimension, elem_t_mangled, vec_size, coord_mangled, coord_input,   \
+    ...)                                                                       \
+  _CLC_DEF elem_t MANGLE_FUNC_IMG_HANDLE(                                      \
+      18, __spirv_ImageFetch, I##elem_t_mangled,                               \
+      coord_mangled##ET_T0_T1_)(ulong imageHandle, coord_input) {              \
+    return __nvvm_tex_##dimension##d_##vec_size##_i32(imageHandle,             \
+                                                      __VA_ARGS__);            \
+  }
+
+// Int
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int, 1, i, i32, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int, 2, i, i32, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int, 3, i, i32, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 1, Dv2_i, v2i32, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 2, Dv2_i, v2i32, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 3, Dv2_i, v2i32, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int4, 1, Dv4_i, v4i32, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int4, 2, Dv4_i, v4i32, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int4, 3, Dv4_i, v4i32, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+
+// Unsigned int
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint, 1, j, j32, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint, 2, j, j32, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint, 3, j, j32, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint2, 1, Dv2_j, v2j32, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint2, 2, Dv2_j, v2j32, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint2, 3, Dv2_j, v2j32, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint4, 1, Dv4_j, v4j32, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint4, 2, Dv4_j, v4j32, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint4, 3, Dv4_j, v4j32, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+
+// Short
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short, 1, s, i16, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short, 2, s, i16, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short, 3, s, i16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short2, 1, Dv2_s, v2i16, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short2, 2, Dv2_s, v2i16, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short2, 3, Dv2_s, v2i16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short4, 1, Dv4_s, v4i16, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short4, 2, Dv4_s, v4i16, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short4, 3, Dv4_s, v4i16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+
+// Unsigned short
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort, 1, t, t16, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort, 2, t, t16, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort, 3, t, t16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 1, Dv2_t, v2t16, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 2, Dv2_t, v2t16, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 3, Dv2_t, v2t16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 1, Dv4_t, v4t16, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 2, Dv4_t, v4t16, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 3, Dv4_t, v4t16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+
+// Char
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char, 1, a, i8, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char, 2, a, i8, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char, 3, a, i8, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char2, 1, Dv2_a, v2i8, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char2, 2, Dv2_a, v2i8, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char2, 3, Dv2_a, v2i8, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char4, 1, Dv4_a, v4i8, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char4, 2, Dv4_a, v4i8, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char4, 3, Dv4_a, v4i8, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+
+// Unsigned Char
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar, 1, h, h8, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar, 2, h, h8, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar, 3, h, h8, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 1, Dv2_h, v2h8, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 2, Dv2_h, v2h8, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 3, Dv2_h, v2h8, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 1, Dv4_h, v4h8, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 2, Dv4_h, v4h8, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 3, Dv4_h, v4h8, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+
+// Float
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float, 1, f, f32, i, uint x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float, 2, f, f32, Dv2_i, uint2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float, 3, f, f32, Dv4_i, uint4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 1, Dv2_f, v2f32, i, uint x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 2, Dv2_f, v2f32, S0_, uint2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 3, Dv2_f, v2f32, Dv4_i, uint4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float4, 1, Dv4_f, v4f32, i, uint x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float4, 2, Dv4_f, v4f32, Dv2_i, uint2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float4, 3, Dv4_f, v4f32, S0_, uint4 coord, coord.x, coord.y, coord.z)
+
+// Half
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half, 1, DF16_, f16, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half, 2, DF16_, f16, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half, 3, DF16_, f16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half2, 1, Dv2_DF16_, v2f16, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half2, 2, Dv2_DF16_, v2f16, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half2, 3, Dv2_DF16_, v2f16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half4, 1, Dv4_DF16_, v4f16, i, int x, x)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half4, 2, Dv4_DF16_, v4f16, Dv2_i, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half4, 3, Dv4_DF16_, v4f16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+
 
 // <--- MIPMAP --->
 
diff --git a/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll b/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll
index fdc7833275234..c81de52c86ba1 100644
--- a/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll
+++ b/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll
@@ -264,7 +264,7 @@ entry:
   ret <4 x i16> %1
 }
 
-; <--- TEXTURES --->
+; <--- TEXTURE SAMPLING (floating-point coordinates) --->
 declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.1d.v4s32.f32(i64, float)
 define <4 x i32> @__clc_llvm_nvvm_tex_1d_v4i32_f32(i64 %img, float %x) nounwind alwaysinline {
 entry:
@@ -337,6 +337,78 @@ entry:
   ret <4 x float> %1
 }
 
+; <--- TEXTURE FETCHING (integer coordinates) --->
+declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.1d.v4s32.s32(i64, i32)
+define <4 x i32> @__clc_llvm_nvvm_tex_1d_v4i32_s32(i64 %img, i32 %x) nounwind alwaysinline {
+entry:
+  %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.1d.v4s32.s32(i64 %img, i32 %x);
+  %1 = tail call <4 x i32>@__clc_struct32_to_vector({i32,i32,i32,i32} %0)
+  ret <4 x i32> %1
+}
+
+declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.2d.v4s32.s32(i64, i32, i32)
+define <4 x i32> @__clc_llvm_nvvm_tex_2d_v4i32_s32(i64 %img, i32 %x, i32 %y) nounwind alwaysinline {
+entry:
+  %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.2d.v4s32.s32(i64 %img, i32 %x, i32 %y);
+  %1 = tail call <4 x i32>@__clc_struct32_to_vector({i32,i32,i32,i32} %0)
+  ret <4 x i32> %1
+}
+
+declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.3d.v4s32.s32(i64, i32, i32, i32)
+define <4 x i32> @__clc_llvm_nvvm_tex_3d_v4i32_s32(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
+entry:
+  %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.3d.v4s32.s32(i64 %img, i32 %x, i32 %y, i32 %z);
+  %1 = tail call <4 x i32>@__clc_struct32_to_vector({i32,i32,i32,i32} %0)
+  ret <4 x i32> %1
+}
+
+declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.1d.v4u32.s32(i64, i32)
+define <4 x i32> @__clc_llvm_nvvm_tex_1d_v4j32_s32(i64 %img, i32 %x) nounwind alwaysinline {
+entry:
+  %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.1d.v4u32.s32(i64 %img, i32 %x);
+  %1 = tail call <4 x i32>@__clc_struct32_to_vector({i32,i32,i32,i32} %0)
+  ret <4 x i32> %1
+}
+
+declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.2d.v4u32.s32(i64, i32, i32)
+define <4 x i32> @__clc_llvm_nvvm_tex_2d_v4j32_s32(i64 %img, i32 %x, i32 %y) nounwind alwaysinline {
+entry:
+  %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.2d.v4u32.s32(i64 %img, i32 %x, i32 %y);
+  %1 = tail call <4 x i32>@__clc_struct32_to_vector({i32,i32,i32,i32} %0)
+  ret <4 x i32> %1
+}
+
+declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.3d.v4u32.s32(i64, i32, i32, i32)
+define <4 x i32> @__clc_llvm_nvvm_tex_3d_v4j32_s32(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
+entry:
+  %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.3d.v4u32.s32(i64 %img, i32 %x, i32 %y, i32 %z);
+  %1 = tail call <4 x i32>@__clc_struct32_to_vector({i32,i32,i32,i32} %0)
+  ret <4 x i32> %1
+}
+
+declare {float,float,float,float} @llvm.nvvm.tex.unified.1d.v4f32.s32(i64, i32)
+define <4 x float> @__clc_llvm_nvvm_tex_1d_v4f32_s32(i64 %img, i32 %x) nounwind alwaysinline {
+entry:
+  %0 = tail call {float,float,float,float} @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %img, i32 %x)
+  %1 = tail call <4 x float>@__clc_structf32_to_vector({float,float,float,float} %0)
+  ret <4 x float> %1
+}
+
+declare {float,float,float,float} @llvm.nvvm.tex.unified.2d.v4f32.s32(i64, i32, i32)
+define <4 x float> @__clc_llvm_nvvm_tex_2d_v4f32_s32(i64 %img, i32 %x, i32 %y) nounwind alwaysinline {
+entry:
+  %0 = tail call {float,float,float,float} @llvm.nvvm.tex.unified.2d.v4f32.s32(i64 %img, i32 %x, i32 %y);
+  %1 = tail call <4 x float>@__clc_structf32_to_vector({float,float,float,float} %0)
+  ret <4 x float> %1
+}
+
+declare {float,float,float,float} @llvm.nvvm.tex.unified.3d.v4f32.s32(i64, i32, i32, i32)
+define <4 x float> @__clc_llvm_nvvm_tex_3d_v4f32_s32(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
+entry:
+  %0 = tail call {float,float,float,float} @llvm.nvvm.tex.unified.3d.v4f32.s32(i64 %img, i32 %x, i32 %y, i32 %z);
+  %1 = tail call <4 x float>@__clc_structf32_to_vector({float,float,float,float} %0)
+  ret <4 x float> %1
+}
 
 
 ; <--- MIPMAP --->
diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
index ff06de04089c1..659385b37e39a 100644
--- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
+++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
@@ -61,6 +61,12 @@ def AspectExt_oneapi_interop_semaphore_export : Aspect<"ext_oneapi_interop_semap
 def AspectExt_oneapi_mipmap : Aspect<"ext_oneapi_mipmap">;
 def AspectExt_oneapi_mipmap_anisotropy : Aspect<"ext_oneapi_mipmap_anisotropy">;
 def AspectExt_oneapi_mipmap_level_reference : Aspect<"ext_oneapi_mipmap_level_reference">;
+def AspectExt_oneapi_bindless_sampled_image_fetch_1d_usm : Aspect<"ext_oneapi_bindless_sampled_image_fetch_1d_usm">;
+def AspectExt_oneapi_bindless_sampled_image_fetch_1d : Aspect<"ext_oneapi_bindless_sampled_image_fetch_1d">;
+def AspectExt_oneapi_bindless_sampled_image_fetch_2d_usm : Aspect<"ext_oneapi_bindless_sampled_image_fetch_2d_usm">;
+def AspectExt_oneapi_bindless_sampled_image_fetch_2d : Aspect<"ext_oneapi_bindless_sampled_image_fetch_2d">;
+def AspectExt_oneapi_bindless_sampled_image_fetch_3d_usm : Aspect<"ext_oneapi_bindless_sampled_image_fetch_3d_usm">;
+def AspectExt_oneapi_bindless_sampled_image_fetch_3d : Aspect<"ext_oneapi_bindless_sampled_image_fetch_3d">;
 def AspectExt_intel_esimd : Aspect<"ext_intel_esimd">;
 def AspectExt_oneapi_ballot_group : Aspect<"ext_oneapi_ballot_group">;
 def AspectExt_oneapi_fixed_size_group : Aspect<"ext_oneapi_fixed_size_group">;
@@ -114,7 +120,11 @@ def : TargetInfo<"__TestAspectList",
     AspectExt_oneapi_bindless_images_shared_usm, AspectExt_oneapi_bindless_images_1d_usm, AspectExt_oneapi_bindless_images_2d_usm,
     AspectExt_oneapi_interop_memory_import, AspectExt_oneapi_interop_memory_export,
     AspectExt_oneapi_interop_semaphore_import, AspectExt_oneapi_interop_semaphore_export,
-    AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy, AspectExt_oneapi_mipmap_level_reference, AspectExt_intel_esimd,
+    AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy, AspectExt_oneapi_mipmap_level_reference,
+    AspectExt_oneapi_bindless_sampled_image_fetch_1d_usm, AspectExt_oneapi_bindless_sampled_image_fetch_1d,
+    AspectExt_oneapi_bindless_sampled_image_fetch_2d_usm, AspectExt_oneapi_bindless_sampled_image_fetch_2d,
+    AspectExt_oneapi_bindless_sampled_image_fetch_3d_usm, AspectExt_oneapi_bindless_sampled_image_fetch_3d,
+    AspectExt_intel_esimd,
     AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group, AspectExt_oneapi_tangle_group],
     []>;
 // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT
diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc
index d37670db4641c..5c05182228c6e 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc
@@ -983,8 +983,16 @@ void write_image(unsampled_image_handle &ImageHandle,
 Inside a kernel, it's possible to read an image via `read_image`, passing 
 the image handle. For the form that takes `unsampled_image_handle`, image data 
 will be fetched exactly as is in device memory. For the form that takes a 
-`sampled_image_handle`, the image will be sampled according to the 
+`sampled_image_handle`, the image data will either be sampled or fetched, 
+depending on the type of `CoordT` passed. If the coordinates are 
+floating-point, the image will be sampled according to the 
 `bindless_image_sampler` that was passed to the image upon construction.
+If the coordinates are integers, the image data will be fetched, and no 
+sampling operations will be performed, the `bindless_image_sampler` passed to 
+the image upon creation has no effect when fetching sampled image data with 
+integer coordinates. Note that not all devices may support fetching of sampled 
+image data depending on the dimension or backing memory type. We provide 
+device aspect queries for this in <<querying_sampled_image_fetch_support>>.
 
 The user is required to pass a `DataT` template parameter, which specifies the
 return type of the `read_image` function. If `DataT` is not a recognized 
@@ -1015,9 +1023,12 @@ For reading and writing of unsampled images, coordinates are specified by `int`,
 `sycl::vec<int, 2>`, and `sycl::vec<int, 4>` for 1D, 2D, and 3D images, 
 respectively.
 
-Sampled image reads take `float`, `sycl::vec<float, 2>`, and 
+Sampled image "sampled reads" take `float`, `sycl::vec<float, 2>`, and 
 `sycl::vec<float, 4>` coordinate types for 1D, 2D, and 3D images, respectively.
 
+Sampled image "fetch reads" take `int`, `sycl::vec<int, 2>`, and 
+`sycl::vec<int, 4>` coordinate types for 1D, 2D, and 3D images, respectively.
+
 Note that in the case of 3D reads or writes, coordinates for 3D images take a 
 vector of size 4, not 3, as the fourth element in the coordinate vector is 
 ignored.
@@ -1065,6 +1076,36 @@ When providing the above types as `DataT` parameters to an image read function,
 the corresponding `HintT` parameters to use would be `sycl::vec<float, 4>` and 
 `sycl::vec<short, 2>`, respectively.
 
+=== Querying sampled image fetch support [[querying_sampled_image_fetch_support]]
+
+We provide the following device queries to query support for sampled image 
+fetch with various backing memory types and dimensionalities.
+
+The device aspect descriptors for these queries are:
+
+[frame="none",options="header"]
+|======================
+|Device descriptor | Description
+|`aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm` | 
+ Indicates if the device is capable of fetching USM backed 1D 
+ sampled image data.
+|`aspect::ext_oneapi_bindless_sampled_image_fetch_1d` | 
+ Indicates if the device is capable of fetching non-USM backed 1D 
+ sampled image data.
+|`aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm` | 
+ Indicates if the device is capable of fetching USM backed 2D 
+ sampled image data.
+|`aspect::ext_oneapi_bindless_sampled_image_fetch_2d` | 
+ Indicates if the device is capable of fetching non-USM backed 2D 
+ sampled image data.
+|`aspect::ext_oneapi_bindless_sampled_image_fetch_3d_usm` | 
+ Indicates if the device is capable of fetching USM backed 3D 
+ sampled image data.
+|`aspect::ext_oneapi_bindless_sampled_image_fetch_3d` | 
+ Indicates if the device is capable of fetching non-USM backed 3D 
+ sampled image data.
+|======================
+
 == Mipmapped images
 
 So far, we have described how to create and operate on standard bindless images.
@@ -2060,4 +2101,6 @@ These features still need to be handled:
                    wording around what types are allowed to be read or written.
                  - Allow `read_image` and `read_mipmap` to return a 
                    user-defined type.
+|5.1|2024-01-16| - Allow fetching of sampled image data by passing integer 
+                   coordinates to `read_image`.
 |======================
diff --git a/sycl/include/CL/__spirv/spirv_ops.hpp b/sycl/include/CL/__spirv/spirv_ops.hpp
index ea1a6580d30e6..36a74bc79fdd3 100644
--- a/sycl/include/CL/__spirv/spirv_ops.hpp
+++ b/sycl/include/CL/__spirv/spirv_ops.hpp
@@ -199,6 +199,9 @@ extern __DPCPP_SYCL_EXTERNAL void __spirv_ImageWrite(ImageT, CoordT, ValT);
 template <class RetT, typename ImageT, typename TempArgT>
 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageRead(ImageT, TempArgT);
 
+template <class RetT, typename ImageT, typename TempArgT>
+extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageFetch(ImageT, TempArgT);
+
 template <typename ImageT, typename SampledType>
 extern __DPCPP_SYCL_EXTERNAL SampledType __spirv_SampledImage(ImageT,
                                                               __ocl_sampler_t);
diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp
index 60db7ccb645c8..adcedc6722297 100644
--- a/sycl/include/sycl/detail/image_ocl_types.hpp
+++ b/sycl/include/sycl/detail/image_ocl_types.hpp
@@ -83,6 +83,18 @@ static RetType __invoke__ImageRead(ImageT Img, CoordT Coords) {
   return sycl::detail::convertDataToType<TempRetT, RetType>(Ret);
 }
 
+template <typename RetType, typename ImageT, typename CoordT>
+static RetType __invoke__ImageFetch(ImageT Img, CoordT Coords) {
+
+  // Convert from sycl types to builtin types to get correct function mangling.
+  using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
+  using TempArgT = sycl::detail::ConvertToOpenCLType_t<CoordT>;
+
+  TempArgT Arg = sycl::detail::convertDataToType<CoordT, TempArgT>(Coords);
+  TempRetT Ret = __spirv_ImageFetch<TempRetT, ImageT, TempArgT>(Img, Arg);
+  return sycl::detail::convertDataToType<TempRetT, RetType>(Ret);
+}
+
 template <typename RetType, typename SmpImageT, typename CoordT>
 static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords,
                                       float Level) {
diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h
index 4d0da9995908a..339059cbd441f 100644
--- a/sycl/include/sycl/detail/pi.h
+++ b/sycl/include/sycl/detail/pi.h
@@ -149,9 +149,16 @@
 // 14.40 Add HIP _pi_mem_advice alises to match the PI_MEM_ADVICE_CUDA* ones.
 // 14.41 Added piextCommandBufferMemBufferFill & piextCommandBufferFillUSM
 // 14.42 Added piextCommandBufferPrefetchUSM and piextCommandBufferAdviseUSM
+// 14.43 Added device queries for sampled image fetch support
+//         - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM
+//         - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D
+//         - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM
+//         - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D
+//         - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM
+//         - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D
 
 #define _PI_H_VERSION_MAJOR 14
-#define _PI_H_VERSION_MINOR 42
+#define _PI_H_VERSION_MINOR 43
 
 #define _PI_STRING_HELPER(a) #a
 #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
@@ -434,6 +441,13 @@ typedef enum {
   PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT = 0x2010F,
 
   PI_EXT_ONEAPI_DEVICE_INFO_MATRIX_COMBINATIONS = 0x20110,
+
+  PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM = 0x20111,
+  PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D = 0x20112,
+  PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM = 0x20113,
+  PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D = 0x20114,
+  PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM = 0x20115,
+  PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D = 0x20116,
 } _pi_device_info;
 
 typedef enum {
diff --git a/sycl/include/sycl/device_aspect_macros.hpp b/sycl/include/sycl/device_aspect_macros.hpp
index 3f515ddc564af..1a801deacb3c5 100644
--- a/sycl/include/sycl/device_aspect_macros.hpp
+++ b/sycl/include/sycl/device_aspect_macros.hpp
@@ -273,28 +273,61 @@
 #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_mipmap_level_reference__ 0
 #endif
 
+#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d_usm__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 53)
+#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ \
+  0
+#endif
+
+#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 54)
+#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d__ 0
+#endif
+
+#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d_usm__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 55)
+#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ \
+  0
+#endif
+
+#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 56)
+#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d__ 0
+#endif
+
+#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d_usm__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 57)
+#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ \
+  0
+#endif
+
+#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 58)
+#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d__ 0
+#endif
+
 #ifndef __SYCL_ALL_DEVICES_HAVE_ext_intel_esimd__
-//__SYCL_ASPECT(ext_intel_esimd, 53)
+//__SYCL_ASPECT(ext_intel_esimd, 59)
 #define __SYCL_ALL_DEVICES_HAVE_ext_intel_esimd__ 0
 #endif
 
 #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_ballot_group__
-// __SYCL_ASPECT(ext_oneapi_ballot_group, 54)
+// __SYCL_ASPECT(ext_oneapi_ballot_group, 60)
 #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_ballot_group__ 0
 #endif
 
 #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_fixed_size_group__
-// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 55)
+// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 61)
 #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_fixed_size_group__ 0
 #endif
 
 #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_opportunistic_group__
-// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 56)
+// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 62)
 #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_opportunistic_group__ 0
 #endif
 
 #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_tangle_group__
-// __SYCL_ASPECT(ext_oneapi_tangle_group, 57)
+// __SYCL_ASPECT(ext_oneapi_tangle_group, 63)
 #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_tangle_group__ 0
 #endif
 
@@ -563,27 +596,57 @@
 #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_mipmap_level_reference__ 0
 #endif
 
+#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d_usm__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 53)
+#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ 0
+#endif
+
+#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 54)
+#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d__ 0
+#endif
+
+#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d_usm__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 55)
+#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ 0
+#endif
+
+#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 56)
+#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d__ 0
+#endif
+
+#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d_usm__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 57)
+#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ 0
+#endif
+
+#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 58)
+#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d__ 0
+#endif
+
 #ifndef __SYCL_ANY_DEVICE_HAS_ext_intel_esimd__
-//__SYCL_ASPECT(ext_intel_esimd, 53)
+//__SYCL_ASPECT(ext_intel_esimd, 59)
 #define __SYCL_ANY_DEVICE_HAS_ext_intel_esimd__ 0
 #endif
 
 #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_ballot_group__
-// __SYCL_ASPECT(ext_oneapi_ballot_group, 54)
+// __SYCL_ASPECT(ext_oneapi_ballot_group, 60)
 #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_ballot_group__ 0
 #endif
 
 #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_fixed_size_group__
-// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 55)
+// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 61)
 #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_fixed_size_group__ 0
 #endif
 
 #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_opportunistic_group__
-// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 56)
+// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 62)
 #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_opportunistic_group__ 0
 #endif
 
 #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_tangle_group__
-// __SYCL_ASPECT(ext_oneapi_tangle_group, 57)
+// __SYCL_ASPECT(ext_oneapi_tangle_group, 63)
 #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_tangle_group__ 0
 #endif
diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp
index 847f53ea2547f..1351414ec0613 100644
--- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp
+++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp
@@ -733,6 +733,28 @@ template <typename CoordT> constexpr void assert_unsampled_coords() {
   }
 }
 
+template <typename CoordT> constexpr bool are_floating_coords() {
+  if constexpr (is_vec_v<CoordT>) {
+    return std::is_same_v<typename CoordT::element_type, float>;
+  } else {
+    return std::is_same_v<CoordT, float>;
+  }
+}
+
+template <typename CoordT> constexpr bool are_integer_coords() {
+  if constexpr (is_vec_v<CoordT>) {
+    return std::is_same_v<typename CoordT::element_type, int>;
+  } else {
+    return std::is_same_v<CoordT, int>;
+  }
+}
+
+template <typename CoordT> constexpr void assert_coords_type() {
+  static_assert(are_floating_coords<CoordT>() || are_integer_coords<CoordT>(),
+                "Expected coordinates to be of `float` or `int` type, or "
+                "vectors of these types.");
+}
+
 // assert coords or elements of coords is of a float type
 template <typename CoordT> constexpr void assert_sampled_coords() {
   if constexpr (std::is_scalar_v<CoordT>) {
@@ -813,7 +835,8 @@ DataT read_image(const unsampled_image_handle &imageHandle [[maybe_unused]],
  *           HintT should be a `sycl::vec` type, `sycl::half` type, or POD type.
  *           HintT must also have the same size as DataT.
  *  @tparam  CoordT The input coordinate type. e.g. float, float2, or float4 for
- *           1D, 2D, and 3D, respectively
+ *           1D, 2D, and 3D sampling, respectively. And int, int2, or int4 for
+ *           1D, 2D, and 3D fetching, respectively.
  *  @param   imageHandle The image handle
  *  @param   coords The coordinates at which to fetch image data
  *  @return  Sampled image data
@@ -827,23 +850,32 @@ DataT read_image(const unsampled_image_handle &imageHandle [[maybe_unused]],
 template <typename DataT, typename HintT = DataT, typename CoordT>
 DataT read_image(const sampled_image_handle &imageHandle [[maybe_unused]],
                  const CoordT &coords [[maybe_unused]]) {
-  detail::assert_sampled_coords<CoordT>();
+  detail::assert_coords_type<CoordT>();
   constexpr size_t coordSize = detail::coord_size<CoordT>();
   static_assert(coordSize == 1 || coordSize == 2 || coordSize == 4,
                 "Expected input coordinate to be have 1, 2, or 4 components "
                 "for 1D, 2D and 3D images, respectively.");
+  static_assert(sizeof(HintT) == sizeof(DataT),
+                "When trying to read a user-defined type, HintT must be of "
+                "the same size as the user-defined DataT.");
+  static_assert(detail::is_recognized_standard_type<HintT>(),
+                "HintT must always be a recognized standard type");
 
 #ifdef __SYCL_DEVICE_ONLY__
-  if constexpr (detail::is_recognized_standard_type<DataT>()) {
-    return __invoke__ImageRead<DataT>(imageHandle.raw_handle, coords);
+  if constexpr (detail::are_floating_coords<CoordT>()) {
+    if constexpr (detail::is_recognized_standard_type<DataT>()) {
+      return __invoke__ImageRead<DataT>(imageHandle.raw_handle, coords);
+    } else {
+      return sycl::bit_cast<DataT>(
+          __invoke__ImageRead<HintT>(imageHandle.raw_handle, coords));
+    }
   } else {
-    static_assert(sizeof(HintT) == sizeof(DataT),
-                  "When trying to read a user-defined type, HintT must be of "
-                  "the same size as the user-defined DataT.");
-    static_assert(detail::is_recognized_standard_type<HintT>(),
-                  "HintT must always be a recognized standard type");
-    return sycl::bit_cast<DataT>(
-        __invoke__ImageRead<HintT>(imageHandle.raw_handle, coords));
+    if constexpr (detail::is_recognized_standard_type<DataT>()) {
+      return __invoke__ImageFetch<DataT>(imageHandle.raw_handle, coords);
+    } else {
+      return sycl::bit_cast<DataT>(
+          __invoke__ImageFetch<HintT>(imageHandle.raw_handle, coords));
+    }
   }
 #else
   assert(false); // Bindless images not yet implemented on host.
diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def
index 31ccd0d038c4a..b2c8607101524 100644
--- a/sycl/include/sycl/info/aspects.def
+++ b/sycl/include/sycl/info/aspects.def
@@ -47,8 +47,14 @@ __SYCL_ASPECT(ext_oneapi_interop_semaphore_export, 49)
 __SYCL_ASPECT(ext_oneapi_mipmap, 50)
 __SYCL_ASPECT(ext_oneapi_mipmap_anisotropy, 51)
 __SYCL_ASPECT(ext_oneapi_mipmap_level_reference, 52)
-__SYCL_ASPECT(ext_intel_esimd, 53)
-__SYCL_ASPECT(ext_oneapi_ballot_group, 54)
-__SYCL_ASPECT(ext_oneapi_fixed_size_group, 55)
-__SYCL_ASPECT(ext_oneapi_opportunistic_group, 56)
-__SYCL_ASPECT(ext_oneapi_tangle_group, 57)
+__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 53)
+__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 54)
+__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 55)
+__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 56)
+__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 57)
+__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 58)
+__SYCL_ASPECT(ext_intel_esimd, 59)
+__SYCL_ASPECT(ext_oneapi_ballot_group, 60)
+__SYCL_ASPECT(ext_oneapi_fixed_size_group, 61)
+__SYCL_ASPECT(ext_oneapi_opportunistic_group, 62)
+__SYCL_ASPECT(ext_oneapi_tangle_group, 63)
diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 8d7f2f32b4158..7a6e9ac2c67eb 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -56,14 +56,20 @@ endif()
 if(SYCL_PI_UR_USE_FETCH_CONTENT)
   include(FetchContent)
 
-  set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
-  # commit 79c28d0f0713f58358d5080653d95803fd131749
-  # Merge: 25e0b603 45d76b78
-  # Author: aarongreig <aaron.greig@codeplay.com>
-  # Date: Fri Jan 12 16:14:44 2024 +0000
-  #     Merge pull request #1186 from hdelan/device-global-hip
-  #     [HIP] Add support for global variable read write
-  set(UNIFIED_RUNTIME_TAG 79c28d0f0713f58358d5080653d95803fd131749)
+  set(UNIFIED_RUNTIME_REPO "git@github.com:przemektmalon/unified-runtime.git")
+  # commit 16c6dc262451cf1b7dcb7d8021904a968c4a16ee
+  # Author: Przemek Malon <przemek.malon@codeplay.com>
+  # Date:   Wed Nov 29 11:25:34 2023 +0000
+  #   [Bindless][Exp] Add device queries for sampled image fetch  
+  #     Added the following queries for device capabilities of fetching sampled
+  #     images:
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D
+  set(UNIFIED_RUNTIME_TAG aa442a391c79692af279aaeda74da63c9ba6a489)
 
   if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
     set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")
diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp
index 7cdfc8ad7d30a..35a641a527d42 100644
--- a/sycl/plugins/unified_runtime/pi2ur.hpp
+++ b/sycl/plugins/unified_runtime/pi2ur.hpp
@@ -1256,6 +1256,24 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
     PI_TO_UR_MAP_DEVICE_INFO(
         PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT,
         UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP)
+    PI_TO_UR_MAP_DEVICE_INFO(
+        PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM,
+        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM)
+    PI_TO_UR_MAP_DEVICE_INFO(
+        PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D,
+        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D)
+    PI_TO_UR_MAP_DEVICE_INFO(
+        PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM,
+        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM)
+    PI_TO_UR_MAP_DEVICE_INFO(
+        PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D,
+        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D)
+    PI_TO_UR_MAP_DEVICE_INFO(
+        PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM,
+        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM)
+    PI_TO_UR_MAP_DEVICE_INFO(
+        PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D,
+        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D)
     PI_TO_UR_MAP_DEVICE_INFO(
         PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT,
         UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP)
diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp
index 5d5b98e4cf02b..b0e9b9fe05778 100644
--- a/sycl/source/detail/device_impl.cpp
+++ b/sycl/source/detail/device_impl.cpp
@@ -546,6 +546,57 @@ bool device_impl::has(aspect Aspect) const {
             sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
     return call_successful && support;
   }
+  case aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm: {
+    pi_bool support = PI_FALSE;
+    bool call_successful =
+        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
+            MDevice,
+            PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM,
+            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
+    return call_successful && support;
+  }
+  case aspect::ext_oneapi_bindless_sampled_image_fetch_1d: {
+    pi_bool support = PI_FALSE;
+    bool call_successful =
+        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
+            MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D,
+            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
+    return call_successful && support;
+  }
+  case aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm: {
+    pi_bool support = PI_FALSE;
+    bool call_successful =
+        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
+            MDevice,
+            PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM,
+            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
+    return call_successful && support;
+  }
+  case aspect::ext_oneapi_bindless_sampled_image_fetch_2d: {
+    pi_bool support = PI_FALSE;
+    bool call_successful =
+        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
+            MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D,
+            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
+    return call_successful && support;
+  }
+  case aspect::ext_oneapi_bindless_sampled_image_fetch_3d_usm: {
+    pi_bool support = PI_FALSE;
+    bool call_successful =
+        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
+            MDevice,
+            PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM,
+            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
+    return call_successful && support;
+  }
+  case aspect::ext_oneapi_bindless_sampled_image_fetch_3d: {
+    pi_bool support = PI_FALSE;
+    bool call_successful =
+        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
+            MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D,
+            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
+    return call_successful && support;
+  }
   case aspect::ext_intel_esimd: {
     pi_bool support = PI_FALSE;
     bool call_successful =
diff --git a/sycl/test-e2e/bindless_images/image_get_info.cpp b/sycl/test-e2e/bindless_images/image_get_info.cpp
index 931332b928cb4..353c4fe5c0472 100644
--- a/sycl/test-e2e/bindless_images/image_get_info.cpp
+++ b/sycl/test-e2e/bindless_images/image_get_info.cpp
@@ -55,8 +55,31 @@ int main() {
     std::cout << "bindless_images_support: " << bindlessSupport
               << "\nbindless_images_shared_usm_support: "
               << bindlessSharedUsmSupport
-              << "\nbindless_images_1d_usm_support: " 1dS
-              << "\nbindless_images_2d_usm_support: " << S << "\n";
+              << "\nbindless_images_1d_usm_support: " << usm1dSupport
+              << "\nbindless_images_2d_usm_support: " << usm2dSupport << "\n";
+#endif
+
+    // Extension: query for sampled image fetch capabilities
+    bool sampledFetch1DUSMSupport =
+        dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm);
+    bool sampledFetch2DUSMSupport =
+        dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm);
+    bool sampledFetch3DUSMSupport =
+        dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_3d_usm);
+    bool sampledFetch1DSupport =
+        dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_1d);
+    bool sampledFetch2DSupport =
+        dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_2d);
+    bool sampledFetch3DSupport =
+        dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_3d);
+
+#ifdef VERBOSE_PRINT
+    std::cout << "sampledFetch1DUSMSupport: " << sampledFetch1DUSMSupport
+              << "\nsampledFetch2DUSMSupport: " << sampledFetch2DUSMSupport
+              << "\nsampledFetch3DUSMSupport: " << sampledFetch3DUSMSupport
+              << "\nsampledFetch1DSupport: " << sampledFetch1DSupport
+              << "\nsampledFetch2DSupport: " << sampledFetch2DSupport
+              << "\nsampledFetch3DSupport: " << sampledFetch3DSupport << "\n";
 #endif
 
     // Extension: get pitch alignment information from device -- device info
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp
new file mode 100644
index 0000000000000..bacafd92fcf21
--- /dev/null
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp
@@ -0,0 +1,104 @@
+// REQUIRES: linux
+// REQUIRES: cuda
+
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+#include <iostream>
+#include <sycl/sycl.hpp>
+
+// Uncomment to print additional test information
+// #define VERBOSE_PRINT
+
+class kernel_sampled_fetch;
+
+int main() {
+  sycl::device dev;
+  sycl::queue q(dev);
+  auto ctxt = q.get_context();
+
+  // Check if device supports 1D USM sampled image fetches
+  if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm)) {
+#ifdef VERBOSE_PRINT
+    std::cout << "Test skipped due to lack of device support for fetching 1D "
+                 "USM backed sampled images\n";
+#endif
+    return 0;
+  }
+
+  // declare image data
+  constexpr size_t width = 16;
+  std::vector<float> out(width);
+  std::vector<float> expected(width);
+  std::vector<float> dataIn(width);
+  auto imgMem = sycl::malloc_shared<float>(width, q);
+  for (int i = 0; i < width; i++) {
+    expected[i] = i;
+    imgMem[i] = i;
+  }
+
+  namespace syclexp = sycl::ext::oneapi::experimental;
+
+  try {
+    // Extension: image descriptor
+    syclexp::image_descriptor desc({width}, sycl::image_channel_order::r,
+                                   sycl::image_channel_type::fp32);
+
+    syclexp::bindless_image_sampler samp(
+        sycl::addressing_mode::repeat,
+        sycl::coordinate_normalization_mode::normalized,
+        sycl::filtering_mode::linear);
+
+    auto imgHandle = syclexp::create_image(imgMem, 0, samp, desc, q);
+
+    sycl::buffer buf(out.data(), sycl::range{width});
+    q.submit([&](sycl::handler &cgh) {
+      auto outAcc = buf.get_access<sycl::access_mode::write>(cgh, width);
+
+      cgh.parallel_for<kernel_sampled_fetch>(width, [=](sycl::id<1> id) {
+        // Extension: fetch data from sampled image handle
+        float px1 = syclexp::read_image<float>(imgHandle, int(id[0]));
+
+        outAcc[id] = px1;
+      });
+    });
+
+    q.wait_and_throw();
+
+    // Extension: cleanup
+    syclexp::destroy_image_handle(imgHandle, dev, ctxt);
+    sycl::free(imgMem, ctxt);
+  } catch (sycl::exception e) {
+    std::cerr << "SYCL exception caught! : " << e.what() << "\n";
+    return 1;
+  } catch (...) {
+    std::cerr << "Unknown exception caught!\n";
+    return 2;
+  }
+
+  // collect and validate output
+  bool validated = true;
+  for (int i = 0; i < width; i++) {
+    bool mismatch = false;
+    if (out[i] != expected[i]) {
+      mismatch = true;
+      validated = false;
+    }
+
+    if (mismatch) {
+#ifdef VERBOSE_PRINT
+      std::cout << "Result mismatch! Expected: " << expected[i]
+                << ", Actual: " << out[i] << std::endl;
+#else
+      break;
+#endif
+    }
+  }
+  if (validated) {
+    std::cout << "Test passed!" << std::endl;
+    return 0;
+  }
+
+  std::cout << "Test failed!" << std::endl;
+  return 3;
+}
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp
new file mode 100644
index 0000000000000..0c5108f5b03f4
--- /dev/null
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp
@@ -0,0 +1,125 @@
+// REQUIRES: linux
+// REQUIRES: cuda
+
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+#include <iostream>
+#include <sycl/sycl.hpp>
+
+// Uncomment to print additional test information
+// #define VERBOSE_PRINT
+
+class kernel_sampled_fetch;
+
+int main() {
+
+  sycl::device dev;
+  sycl::queue q(dev);
+  auto ctxt = q.get_context();
+
+  // Check if device supports 2D non-USM sampled image fetches
+  if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_2d)) {
+#ifdef VERBOSE_PRINT
+    std::cout << "Test skipped due to lack of device support for fetching 2D "
+                 "non-USM backed sampled images\n";
+#endif
+    return 0;
+  }
+
+  // declare image data
+  constexpr size_t width = 5;
+  constexpr size_t height = 6;
+  constexpr size_t N = width * height;
+  std::vector<float> out(N);
+  std::vector<float> expected(N);
+  std::vector<float> dataIn(N);
+  for (int i = 0; i < width; i++) {
+    for (int j = 0; j < height; j++) {
+      auto index = i + (width * j);
+      expected[index] = index;
+      dataIn[index] = index;
+    }
+  }
+
+  namespace syclexp = sycl::ext::oneapi::experimental;
+
+  try {
+    syclexp::bindless_image_sampler samp(
+        sycl::addressing_mode::repeat,
+        sycl::coordinate_normalization_mode::unnormalized,
+        sycl::filtering_mode::nearest);
+
+    // Extension: image descriptor
+    syclexp::image_descriptor desc({width, height},
+                                   sycl::image_channel_order::r,
+                                   sycl::image_channel_type::fp32);
+
+    // Extension: allocate memory on device
+    syclexp::image_mem imgMem(desc, dev, ctxt);
+
+    // Extension: copy over data to device for non-USM image
+    q.ext_oneapi_copy(dataIn.data(), imgMem.get_handle(), desc);
+    q.wait_and_throw();
+
+    // Extension: create the images and return the handles
+    syclexp::sampled_image_handle imgHandle =
+        syclexp::create_image(imgMem, samp, desc, q);
+
+    sycl::buffer buf(out.data(), sycl::range{height, width});
+    q.submit([&](sycl::handler &cgh) {
+      auto outAcc = buf.get_access<sycl::access_mode::write>(
+          cgh, sycl::range<2>{height, width});
+
+      cgh.parallel_for<kernel_sampled_fetch>(
+          sycl::nd_range<2>{{width, height}, {width, height}},
+          [=](sycl::nd_item<2> it) {
+            size_t dim0 = it.get_local_id(0);
+            size_t dim1 = it.get_local_id(1);
+
+            // Extension: fetch data from sampled image handle
+            float px1 =
+                syclexp::read_image<float>(imgHandle, sycl::int2(dim0, dim1));
+
+            outAcc[sycl::id<2>{dim1, dim0}] = px1;
+          });
+    });
+
+    q.wait_and_throw();
+
+    // Extension: cleanup
+    syclexp::destroy_image_handle(imgHandle, dev, ctxt);
+  } catch (sycl::exception e) {
+    std::cerr << "SYCL exception caught! : " << e.what() << "\n";
+    return 1;
+  } catch (...) {
+    std::cerr << "Unknown exception caught!\n";
+    return 2;
+  }
+
+  // collect and validate output
+  bool validated = true;
+  for (int i = 0; i < N; i++) {
+    bool mismatch = false;
+    if (out[i] != expected[i]) {
+      mismatch = true;
+      validated = false;
+    }
+
+    if (mismatch) {
+#ifdef VERBOSE_PRINT
+      std::cout << "Result mismatch! Expected: " << expected[i]
+                << ", Actual: " << out[i] << std::endl;
+#else
+      break;
+#endif
+    }
+  }
+  if (validated) {
+    std::cout << "Test passed!" << std::endl;
+    return 0;
+  }
+
+  std::cout << "Test failed!" << std::endl;
+  return 3;
+}
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp
new file mode 100644
index 0000000000000..8932560b95e91
--- /dev/null
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp
@@ -0,0 +1,132 @@
+// REQUIRES: linux
+// REQUIRES: cuda
+
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+#include <iostream>
+#include <sycl/sycl.hpp>
+
+// Uncomment to print additional test information
+// #define VERBOSE_PRINT
+
+class kernel_sampled_fetch;
+
+int main() {
+
+  sycl::device dev;
+  sycl::queue q(dev);
+  auto ctxt = q.get_context();
+
+  // Check if device supports 2D USM sampled image fetches
+  if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm)) {
+#ifdef VERBOSE_PRINT
+    std::cout << "Test skipped due to lack of device support for fetching 2D "
+                 "USM backed sampled images\n";
+#endif
+    return 0;
+  }
+
+  // declare image data
+  constexpr size_t width = 5;
+  constexpr size_t height = 6;
+  constexpr size_t N = width * height;
+  std::vector<sycl::vec<uint16_t, 4>> out(N);
+  std::vector<sycl::vec<uint16_t, 4>> expected(N);
+  std::vector<sycl::vec<uint16_t, 4>> dataIn(N);
+  for (int i = 0; i < width; i++) {
+    for (int j = 0; j < height; j++) {
+      auto index = i + (width * j);
+      expected[index] = {index, index, index, index};
+      dataIn[index] = {index, index, index, index};
+    }
+  }
+
+  namespace syclexp = sycl::ext::oneapi::experimental;
+
+  try {
+    syclexp::bindless_image_sampler samp(
+        sycl::addressing_mode::repeat,
+        sycl::coordinate_normalization_mode::normalized,
+        sycl::filtering_mode::linear);
+
+    // Extension: image descriptor
+    syclexp::image_descriptor desc({width, height},
+                                   sycl::image_channel_order::rgba,
+                                   sycl::image_channel_type::unsigned_int16);
+    size_t pitch = 0;
+
+    // Extension: returns the device pointer to USM allocated pitched memory
+    auto imgMem = syclexp::pitched_alloc_device(&pitch, desc, q);
+
+    if (imgMem == nullptr) {
+      std::cout << "Error allocating pitched image memory!" << std::endl;
+      return 1;
+    }
+
+    // Extension: copy over data to device for USM image
+    q.ext_oneapi_copy(dataIn.data(), imgMem, desc, pitch);
+    q.wait_and_throw();
+
+    // Extension: create the images and return the handles
+    syclexp::sampled_image_handle imgHandle =
+        syclexp::create_image(imgMem, pitch, samp, desc, q);
+
+    sycl::buffer buf(out.data(), sycl::range{height, width});
+    q.submit([&](sycl::handler &cgh) {
+      auto outAcc = buf.get_access<sycl::access_mode::write>(
+          cgh, sycl::range<2>{height, width});
+
+      cgh.parallel_for<kernel_sampled_fetch>(
+          sycl::nd_range<2>{{width, height}, {width, height}},
+          [=](sycl::nd_item<2> it) {
+            size_t dim0 = it.get_local_id(0);
+            size_t dim1 = it.get_local_id(1);
+
+            // Extension: fetch data from sampled image handle
+            auto px1 = syclexp::read_image<sycl::vec<uint16_t, 4>>(
+                imgHandle, sycl::int2(dim0, dim1));
+
+            outAcc[sycl::id<2>{dim1, dim0}] = px1;
+          });
+    });
+
+    q.wait_and_throw();
+
+    // Extension: cleanup
+    syclexp::destroy_image_handle(imgHandle, dev, ctxt);
+    sycl::free(imgMem, ctxt);
+  } catch (sycl::exception e) {
+    std::cerr << "SYCL exception caught! : " << e.what() << "\n";
+    return 1;
+  } catch (...) {
+    std::cerr << "Unknown exception caught!\n";
+    return 2;
+  }
+
+  // collect and validate output
+  bool validated = true;
+  for (int i = 0; i < N; i++) {
+    bool mismatch = false;
+    if (out[i][0] != expected[i][0]) {
+      mismatch = true;
+      validated = false;
+    }
+
+    if (mismatch) {
+#ifdef VERBOSE_PRINT
+      std::cout << "Result mismatch! Expected: " << expected[i][0]
+                << ", Actual: " << out[i][0] << std::endl;
+#else
+      break;
+#endif
+    }
+  }
+  if (validated) {
+    std::cout << "Test passed!" << std::endl;
+    return 0;
+  }
+
+  std::cout << "Test failed!" << std::endl;
+  return 3;
+}
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
new file mode 100644
index 0000000000000..823eb8b7cc9b1
--- /dev/null
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
@@ -0,0 +1,129 @@
+// REQUIRES: linux
+// REQUIRES: cuda
+
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+#include <iostream>
+#include <sycl/sycl.hpp>
+
+// Uncomment to print additional test information
+// #define VERBOSE_PRINT
+
+class kernel_sampled_fetch;
+
+int main() {
+
+  sycl::device dev;
+  sycl::queue q(dev);
+  auto ctxt = q.get_context();
+
+  // Check if device supports 3D non-USM sampled image fetches
+  if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_3d)) {
+#ifdef VERBOSE_PRINT
+    std::cout << "Test skipped due to lack of device support for fetching 3D "
+                 "non-USM backed sampled images\n";
+#endif
+    return 0;
+  }
+
+  // declare image data
+  constexpr size_t width = 4;
+  constexpr size_t height = 6;
+  constexpr size_t depth = 8;
+  constexpr size_t N = width * height * depth;
+  std::vector<float> out(N);
+  std::vector<float> expected(N);
+  std::vector<float> dataIn(N);
+  for (int i = 0; i < width; i++) {
+    for (int j = 0; j < height; j++) {
+      for (int k = 0; k < depth; k++) {
+        auto index = i + width * (j + height * k);
+        expected[index] = index;
+        dataIn[index] = index;
+      }
+    }
+  }
+
+  namespace syclexp = sycl::ext::oneapi::experimental;
+
+  try {
+    // Extension: image descriptor
+    syclexp::image_descriptor desc({width, height, depth},
+                                   sycl::image_channel_order::r,
+                                   sycl::image_channel_type::fp32);
+
+    syclexp::bindless_image_sampler samp(
+        sycl::addressing_mode::none,
+        sycl::coordinate_normalization_mode::unnormalized,
+        sycl::filtering_mode::nearest);
+
+    // Extension: allocate memory on device
+    syclexp::image_mem imgMem(desc, dev, ctxt);
+
+    // Extension: copy over data to device
+    q.ext_oneapi_copy(dataIn.data(), imgMem.get_handle(), desc);
+    q.wait_and_throw();
+
+    // Extension: create the image and return the handle
+    syclexp::sampled_image_handle imgHandle =
+        syclexp::create_image(imgMem, samp, desc, dev, ctxt);
+
+    sycl::buffer buf(out.data(), sycl::range{depth, height, width});
+    q.submit([&](sycl::handler &cgh) {
+      auto outAcc = buf.get_access<sycl::access_mode::write>(
+          cgh, sycl::range<3>{depth, height, width});
+
+      cgh.parallel_for<kernel_sampled_fetch>(
+          sycl::nd_range<3>{{width, height, depth}, {width, height, depth}},
+          [=](sycl::nd_item<3> it) {
+            size_t dim0 = it.get_local_id(0);
+            size_t dim1 = it.get_local_id(1);
+            size_t dim2 = it.get_local_id(2);
+
+            // Extension: fetch data from sampled image handle
+            float px1 = syclexp::read_image<float>(
+                imgHandle, sycl::vec<int, 4>(dim0, dim1, dim2, 0));
+
+            outAcc[sycl::id<3>{dim2, dim1, dim0}] = px1;
+          });
+    });
+
+    q.wait_and_throw();
+
+    // Extension: cleanup
+    syclexp::destroy_image_handle(imgHandle, dev, ctxt);
+  } catch (sycl::exception e) {
+    std::cerr << "SYCL exception caught! : " << e.what() << "\n";
+    return 1;
+  } catch (...) {
+    std::cerr << "Unknown exception caught!\n";
+    return 2;
+  }
+
+  // collect and validate output
+  bool validated = true;
+  for (int i = 0; i < N; i++) {
+    bool mismatch = false;
+    if (out[i] != expected[i]) {
+      mismatch = true;
+      validated = false;
+    }
+
+    if (mismatch) {
+#ifdef VERBOSE_PRINT
+      std::cout << "Result mismatch! Expected: " << expected[i]
+                << ", Actual: " << out[i] << std::endl;
+#else
+      break;
+#endif
+    }
+  }
+  if (validated) {
+    std::cout << "Test passed!" << std::endl;
+    return 0;
+  }
+
+  std::cout << "Test failed!" << std::endl;
+  return 3;
+}
diff --git a/sycl/test-e2e/bindless_images/sampling_1D.cpp b/sycl/test-e2e/bindless_images/sampling_1D.cpp
index 554ba98e13333..b363fd77ec604 100644
--- a/sycl/test-e2e/bindless_images/sampling_1D.cpp
+++ b/sycl/test-e2e/bindless_images/sampling_1D.cpp
@@ -112,6 +112,6 @@ int main() {
     return 0;
   }
 
-  std::cout << "Test passed!" << std::endl;
+  std::cout << "Test failed!" << std::endl;
   return 3;
 }

From 8c1e5427bd0f8d288922c5c50d0ce5cc43b3dc25 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Fri, 19 Jan 2024 14:23:34 +0000
Subject: [PATCH 02/34] Change UR repo link in CMakeLists to HTTPS

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 7a6e9ac2c67eb..1b7c3ca9385b7 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -56,7 +56,7 @@ endif()
 if(SYCL_PI_UR_USE_FETCH_CONTENT)
   include(FetchContent)
 
-  set(UNIFIED_RUNTIME_REPO "git@github.com:przemektmalon/unified-runtime.git")
+  set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
   # commit 16c6dc262451cf1b7dcb7d8021904a968c4a16ee
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000

From 992d8890ff0ccf10e0d67025c2fa2d5c7038214d Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Mon, 22 Jan 2024 09:54:24 +0000
Subject: [PATCH 03/34] Update UR TAG in CMakeLists

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 85e7f3aa0296c..9d93e72862af7 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -57,7 +57,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   include(FetchContent)
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 16c6dc262451cf1b7dcb7d8021904a968c4a16ee
+  # commit 9713f7d8469600dcc4b220b4f1d1f01ea909068d
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -69,7 +69,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D
-  set(UNIFIED_RUNTIME_TAG aa442a391c79692af279aaeda74da63c9ba6a489)
+  set(UNIFIED_RUNTIME_TAG 9713f7d8469600dcc4b220b4f1d1f01ea909068d)
 
   if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
     set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")

From 25752ac4c2862b570e0553934158a3b17a9d7c52 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Tue, 23 Jan 2024 13:07:59 +0000
Subject: [PATCH 04/34] Update UR enums with _EXP suffix

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 16 ++++++++--------
 sycl/plugins/unified_runtime/pi2ur.hpp      | 12 ++++++------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 9d93e72862af7..4095e698afa40 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -57,19 +57,19 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   include(FetchContent)
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 9713f7d8469600dcc4b220b4f1d1f01ea909068d
+  # commit c9f5374d9e991068431d20456e214f595d7920a5
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
   #     Added the following queries for device capabilities of fetching sampled
   #     images:
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D
-  set(UNIFIED_RUNTIME_TAG 9713f7d8469600dcc4b220b4f1d1f01ea909068d)
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
+  set(UNIFIED_RUNTIME_TAG c9f5374d9e991068431d20456e214f595d7920a5)
 
   if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
     set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")
diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp
index 35a641a527d42..a603e96c32f5c 100644
--- a/sycl/plugins/unified_runtime/pi2ur.hpp
+++ b/sycl/plugins/unified_runtime/pi2ur.hpp
@@ -1258,22 +1258,22 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
         UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP)
     PI_TO_UR_MAP_DEVICE_INFO(
         PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM,
-        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM)
+        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP)
     PI_TO_UR_MAP_DEVICE_INFO(
         PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D,
-        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D)
+        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP)
     PI_TO_UR_MAP_DEVICE_INFO(
         PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM,
-        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM)
+        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP)
     PI_TO_UR_MAP_DEVICE_INFO(
         PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D,
-        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D)
+        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP)
     PI_TO_UR_MAP_DEVICE_INFO(
         PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM,
-        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM)
+        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP)
     PI_TO_UR_MAP_DEVICE_INFO(
         PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D,
-        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D)
+        UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP)
     PI_TO_UR_MAP_DEVICE_INFO(
         PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT,
         UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP)

From 9439bdd3524277946de080c9b359935810e77796 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Tue, 6 Feb 2024 09:16:02 +0000
Subject: [PATCH 05/34] Update UR TAG in CMakeLists

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 4095e698afa40..527b129d5db39 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -57,7 +57,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   include(FetchContent)
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit c9f5374d9e991068431d20456e214f595d7920a5
+  # commit 6ddda43379f9c226a5cf04f41dfbb923c5ff831f
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -69,7 +69,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG c9f5374d9e991068431d20456e214f595d7920a5)
+  set(UNIFIED_RUNTIME_TAG 6ddda43379f9c226a5cf04f41dfbb923c5ff831f)
 
   if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
     set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")

From 73614cbdd0a0d2f1da389f958ee69ed44a772c66 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Tue, 6 Feb 2024 10:45:04 +0000
Subject: [PATCH 06/34] Update CMakeLists UR TAG

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index d3d275d461fe4..db0c01f3067f0 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -57,7 +57,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   include(FetchContent)
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 6ddda43379f9c226a5cf04f41dfbb923c5ff831f
+  # commit 098b31066bb30b5f5978d3e95e4e4070ef11bfe4
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -69,7 +69,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG 6ddda43379f9c226a5cf04f41dfbb923c5ff831f)
+  set(UNIFIED_RUNTIME_TAG 098b31066bb30b5f5978d3e95e4e4070ef11bfe4)
 
   if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
     set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")

From 17ba91f2619b06fd748f112d59d6574aa3298952 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Mon, 12 Feb 2024 16:34:16 +0000
Subject: [PATCH 07/34] Apply libclc macro changes to reduce LOC

---
 libclc/ptx-nvidiacl/libspirv/images/image.cl  | 108 +++++++-----------
 .../sampled_fetch/fetch_3D.cpp                |   4 +-
 2 files changed, 44 insertions(+), 68 deletions(-)

diff --git a/libclc/ptx-nvidiacl/libspirv/images/image.cl b/libclc/ptx-nvidiacl/libspirv/images/image.cl
index 67553b03c2f84..20135c3ea7e70 100644
--- a/libclc/ptx-nvidiacl/libspirv/images/image.cl
+++ b/libclc/ptx-nvidiacl/libspirv/images/image.cl
@@ -2338,72 +2338,44 @@ float4 __nvvm_tex_3d_v4f32_i32(unsigned long, int, int,
 #define COORD_PARAMS_2D x, y
 #define COORD_PARAMS_3D x, y, z
 
-_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(short, int, 1, v4i16, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(short, int, 2, v4i16, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(short, int, 3, v4i16, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 1, v4t16, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 2, v4t16, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 3, v4t16, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(char, int, 1, v4i8, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(char, int, 2, v4i8, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(char, int, 3, v4i8, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 1, v4h8, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 2, v4h8, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 3, v4h8, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(half, float, 1, v4f16, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(half, float, 2, v4f16, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(half, float, 3, v4f16, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D)
-
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(float, float, 1, v2f32, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(float, float, 2, v2f32, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(float, float, 3, v2f32, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(int, int, 1, v2i32, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(int, int, 2, v2i32, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(int, int, 3, v2i32, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 1, v2j32, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 2, v2j32, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 3, v2j32, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(short, int, 1, v2i16, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(short, int, 2, v2i16, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(short, int, 3, v2i16, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 1, v2t16, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 2, v2t16, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 3, v2t16, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(char, int, 1, v2i8, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(char, int, 2, v2i8, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(char, int, 3, v2i8, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 1, v2h8, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 2, v2h8, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 3, v2h8, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(half, float, 1, v2f16, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(half, float, 2, v2f16, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(half, float, 3, v2f16, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D)
-
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(float, float, 1, f32, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(float, float, 2, f32, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(float, float, 3, f32, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(int, int, 1, i32, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(int, int, 2, i32, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(int, int, 3, i32, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 1, j32, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 2, j32, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uint, uint, 3, j32, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(short, int, 1, i16, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(short, int, 2, i16, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(short, int, 3, i16, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 1, t16, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 2, t16, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(ushort, uint, 3, t16, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(char, int, 1, i8, v4i32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(char, int, 2, i8, v4i32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(char, int, 3, i8, v4i32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 1, h8, v4j32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 2, h8, v4j32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(uchar, uint, 3, h8, v4j32, COORD_INPUT_3D, COORD_PARAMS_3D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(half, float, 1, f16, v4f32, COORD_INPUT_1D, COORD_PARAMS_1D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(half, float, 2, f16, v4f32, COORD_INPUT_2D, COORD_PARAMS_2D)
-_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(half, float, 3, f16, v4f32, COORD_INPUT_3D, COORD_PARAMS_3D)
-
+#define _CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(elem_t, fetch_elem_t, vec_size, fetch_vec_size) \
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 1, vec_size, fetch_vec_size, COORD_INPUT_1D, COORD_PARAMS_1D) \
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 2, vec_size, fetch_vec_size, COORD_INPUT_2D, COORD_PARAMS_2D) \
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 3, vec_size, fetch_vec_size, COORD_INPUT_3D, COORD_PARAMS_3D)
+
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(short, int, v4i16, v4i32)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(ushort, uint, v4t16, v4j32)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(char, int, v4i8, v4i32)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(uchar, uint, v4h8, v4j32)
+_CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(half, float, v4f16, v4f32)
+
+#define _CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(elem_t, fetch_elem_t, vec_size, fetch_vec_size) \
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 1, vec_size, fetch_vec_size, COORD_INPUT_1D, COORD_PARAMS_1D) \
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 2, vec_size, fetch_vec_size, COORD_INPUT_2D, COORD_PARAMS_2D) \
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 3, vec_size, fetch_vec_size, COORD_INPUT_3D, COORD_PARAMS_3D)
+
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(float, float, v2f32, v4f32)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(int, int, v2i32, v4i32)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(uint, uint, v2j32, v4j32)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(short, int, v2i16, v4i32)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(ushort, uint, v2t16, v4j32)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(char, int, v2i8, v4i32)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(uchar, uint, v2h8, v4j32)
+_CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(half, float, v2f16, v4f32)
+
+#define _CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(elem_t, fetch_elem_t, vec_size, fetch_vec_size) \
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 1, vec_size, fetch_vec_size, COORD_INPUT_1D, COORD_PARAMS_1D) \
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 2, vec_size, fetch_vec_size, COORD_INPUT_2D, COORD_PARAMS_2D) \
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(elem_t, fetch_elem_t, 3, vec_size, fetch_vec_size, COORD_INPUT_3D, COORD_PARAMS_3D)
+
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(float, float, f32, v4f32)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(int, int, i32, v4i32)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(uint, uint, j32, v4j32)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(short, int, i16, v4i32)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(ushort, uint, t16, v4j32)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(char, int, i8, v4i32)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(uchar, uint, h8, v4j32)
+_CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(half, float, f16, v4f32)
 
 #undef COORD_INPUT_1D
 #undef COORD_INPUT_2D
@@ -2417,6 +2389,10 @@ _CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN(half, float, 3, f16, v4f32, COO
 #undef _CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN
 #undef _CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN
 
+#undef _CLC_DEFINE_BINDLESS_VEC4THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS
+#undef _CLC_DEFINE_BINDLESS_VEC2THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS
+#undef _CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS
+
 #define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(                       \
     elem_t, dimension, elem_t_mangled, vec_size, coord_mangled, coord_input,   \
     ...)                                                                       \
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
index 823eb8b7cc9b1..185d2b31b1515 100644
--- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
@@ -8,7 +8,7 @@
 #include <sycl/sycl.hpp>
 
 // Uncomment to print additional test information
-// #define VERBOSE_PRINT
+#define VERBOSE_PRINT
 
 class kernel_sampled_fetch;
 
@@ -54,7 +54,7 @@ int main() {
                                    sycl::image_channel_type::fp32);
 
     syclexp::bindless_image_sampler samp(
-        sycl::addressing_mode::none,
+        sycl::addressing_mode::repeat,
         sycl::coordinate_normalization_mode::unnormalized,
         sycl::filtering_mode::nearest);
 

From a38a8a9ccda4a60ef3e701c005555db31432fc56 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Mon, 12 Feb 2024 16:45:35 +0000
Subject: [PATCH 08/34] Update UR repo TAG

---
 sycl/plugins/unified_runtime/CMakeLists.txt              | 4 ++--
 sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index db0c01f3067f0..a0e4f2cb66d4b 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -57,7 +57,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   include(FetchContent)
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 098b31066bb30b5f5978d3e95e4e4070ef11bfe4
+  # commit 18f3039ce5ba360a3ed00e614203bb4eda960e06
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -69,7 +69,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG 098b31066bb30b5f5978d3e95e4e4070ef11bfe4)
+  set(UNIFIED_RUNTIME_TAG 18f3039ce5ba360a3ed00e614203bb4eda960e06)
 
   if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
     set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
index 185d2b31b1515..2cec4ab976a98 100644
--- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
@@ -8,7 +8,7 @@
 #include <sycl/sycl.hpp>
 
 // Uncomment to print additional test information
-#define VERBOSE_PRINT
+// #define VERBOSE_PRINT
 
 class kernel_sampled_fetch;
 

From 73c665a474bdf5efbbcd96e740a0dd43c8f0a5a8 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Thu, 22 Feb 2024 13:20:25 +0000
Subject: [PATCH 09/34] Fix deprecated function calls. Update UR tag & fetch
 tests

---
 sycl/include/sycl/detail/image_ocl_types.hpp  |  19 +-
 .../sycl/ext/oneapi/bindless_images.hpp       | 182 +++++++++---------
 sycl/plugins/unified_runtime/CMakeLists.txt   |   4 +-
 .../sampled_fetch/fetch_1D_USM.cpp            |   2 +-
 .../sampled_fetch/fetch_2D.cpp                |   2 +-
 .../sampled_fetch/fetch_2D_USM.cpp            |   2 +-
 .../sampled_fetch/fetch_3D.cpp                |   4 +-
 7 files changed, 116 insertions(+), 99 deletions(-)

diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp
index 96b53b04f782c..2a10a722c565a 100644
--- a/sycl/include/sycl/detail/image_ocl_types.hpp
+++ b/sycl/include/sycl/detail/image_ocl_types.hpp
@@ -84,13 +84,24 @@ static RetType __invoke__ImageFetch(ImageT Img, CoordT Coords) {
 
   // Convert from sycl types to builtin types to get correct function mangling.
   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
-  using TempArgT = sycl::detail::ConvertToOpenCLType_t<CoordT>;
+  auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
-  TempArgT Arg = sycl::detail::convertDataToType<CoordT, TempArgT>(Coords);
-  TempRetT Ret = __spirv_ImageFetch<TempRetT, ImageT, TempArgT>(Img, Arg);
-  return sycl::detail::convertDataToType<TempRetT, RetType>(Ret);
+  return sycl::detail::convertFromOpenCLTypeFor<RetType>(
+      __spirv_ImageFetch<TempRetT, ImageT, decltype(TmpCoords)>(Img, TmpCoords));
 }
 
+// template <typename RetType, typename ImageT, typename CoordT>
+// static RetType __invoke__ImageFetch(ImageT Img, CoordT Coords) {
+
+//   // Convert from sycl types to builtin types to get correct function mangling.
+//   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
+//   using TempArgT = sycl::detail::ConvertToOpenCLType_t<CoordT>;
+
+//   TempArgT Arg = sycl::detail::convertDataToType<CoordT, TempArgT>(Coords);
+//   TempRetT Ret = __spirv_ImageFetch<TempRetT, ImageT, TempArgT>(Img, Arg);
+//   return sycl::detail::convertDataToType<TempRetT, RetType>(Ret);
+// }
+
 template <typename RetType, typename SmpImageT, typename CoordT>
 static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords,
                                       float Level) {
diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp
index 18e6ffeac317c..b859e8a38c89e 100644
--- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp
+++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp
@@ -792,34 +792,6 @@ template <typename DataT> constexpr bool is_recognized_standard_type() {
 
 } // namespace detail
 
-/**
- *  @brief   [Deprecated] Read an unsampled image using its handle
- *
- *  @tparam  DataT The return type
- *  @tparam  HintT A hint type that can be used to select for a specialized
- *           backend intrinsic when a user-defined type is passed as `DataT`.
- *           HintT should be a `sycl::vec` type, `sycl::half` type, or POD type.
- *           HintT must also have the same size as DataT.
- *  @tparam  CoordT The input coordinate type. e.g. int, int2, or int3 for
- *           1D, 2D, and 3D, respectively
- *  @param   imageHandle The image handle
- *  @param   coords The coordinates at which to fetch image data
- *  @return  Image data
- *
- *  __NVPTX__: Name mangling info
- *             Cuda surfaces require integer coords (by bytes)
- *             Cuda textures require float coords (by element or normalized)
- *             The name mangling should therefore not interfere with one
- *             another
- */
-template <typename DataT, typename HintT = DataT, typename CoordT>
-__SYCL_DEPRECATED("read_image for standard unsampled images is deprecated. "
-                  "Instead use fetch_image.")
-DataT read_image(const unsampled_image_handle &imageHandle [[maybe_unused]],
-                 const CoordT &coords [[maybe_unused]]) {
-  return fetch_image(imageHandle, coords);
-}
-
 /**
  *  @brief   Fetch data from an unsampled image using its handle
  *
@@ -867,18 +839,18 @@ DataT fetch_image(const unsampled_image_handle &imageHandle [[maybe_unused]],
 }
 
 /**
- *  @brief   [Deprecated] Read a sampled image using its handle
+ *  @brief   [Deprecated] Read an unsampled image using its handle
  *
  *  @tparam  DataT The return type
  *  @tparam  HintT A hint type that can be used to select for a specialized
  *           backend intrinsic when a user-defined type is passed as `DataT`.
  *           HintT should be a `sycl::vec` type, `sycl::half` type, or POD type.
  *           HintT must also have the same size as DataT.
- *  @tparam  CoordT The input coordinate type. e.g. float, float2, or float3 for
+ *  @tparam  CoordT The input coordinate type. e.g. int, int2, or int3 for
  *           1D, 2D, and 3D, respectively
  *  @param   imageHandle The image handle
- *  @param   coords The coordinates at which to sample image data
- *  @return  Sampled image data
+ *  @param   coords The coordinates at which to fetch image data
+ *  @return  Image data
  *
  *  __NVPTX__: Name mangling info
  *             Cuda surfaces require integer coords (by bytes)
@@ -887,11 +859,11 @@ DataT fetch_image(const unsampled_image_handle &imageHandle [[maybe_unused]],
  *             another
  */
 template <typename DataT, typename HintT = DataT, typename CoordT>
-__SYCL_DEPRECATED("read_image for standard sampled images is deprecated. "
-                  "Instead use sample_image or fetch_image.")
-DataT read_image(const sampled_image_handle &imageHandle [[maybe_unused]],
+__SYCL_DEPRECATED("read_image for standard unsampled images is deprecated. "
+                  "Instead use fetch_image.")
+DataT read_image(const unsampled_image_handle &imageHandle [[maybe_unused]],
                  const CoordT &coords [[maybe_unused]]) {
-  return sample_image(imageHandle, coords);
+  return fetch_image<DataT>(imageHandle, coords);
 }
 
 /**
@@ -930,12 +902,12 @@ DataT fetch_image(const sampled_image_handle &imageHandle [[maybe_unused]],
                 "HintT must always be a recognized standard type");
 
 #ifdef __SYCL_DEVICE_ONLY__
-    if constexpr (detail::is_recognized_standard_type<DataT>()) {
-      return __invoke__ImageFetch<DataT>(imageHandle.raw_handle, coords);
-    } else {
-      return sycl::bit_cast<DataT>(
-          __invoke__ImageFetch<HintT>(imageHandle.raw_handle, coords));
-    }
+  if constexpr (detail::is_recognized_standard_type<DataT>()) {
+    return __invoke__ImageRead<DataT>(imageHandle.raw_handle, coords);
+  } else {
+    return sycl::bit_cast<DataT>(
+        __invoke__ImageRead<HintT>(imageHandle.raw_handle, coords));
+  }
 #else
   assert(false); // Bindless images not yet implemented on host.
 #endif
@@ -977,20 +949,19 @@ DataT sample_image(const sampled_image_handle &imageHandle [[maybe_unused]],
                 "HintT must always be a recognized standard type");
 
 #ifdef __SYCL_DEVICE_ONLY__
-    if constexpr (detail::is_recognized_standard_type<DataT>()) {
-      return __invoke__ImageRead<DataT>(imageHandle.raw_handle, coords);
-    } else {
-      return sycl::bit_cast<DataT>(
-          __invoke__ImageRead<HintT>(imageHandle.raw_handle, coords));
-    }
+  if constexpr (detail::is_recognized_standard_type<DataT>()) {
+    return __invoke__ImageRead<DataT>(imageHandle.raw_handle, coords);
+  } else {
+    return sycl::bit_cast<DataT>(
+        __invoke__ImageRead<HintT>(imageHandle.raw_handle, coords));
+  }
 #else
   assert(false); // Bindless images not yet implemented on host.
 #endif
 }
 
 /**
- *  @brief   [Deprecated] Read a mipmap image using its handle with LOD
- *           filtering
+ *  @brief   [Deprecated] Read a sampled image using its handle
  *
  *  @tparam  DataT The return type
  *  @tparam  HintT A hint type that can be used to select for a specialized
@@ -999,45 +970,28 @@ DataT sample_image(const sampled_image_handle &imageHandle [[maybe_unused]],
  *           HintT must also have the same size as DataT.
  *  @tparam  CoordT The input coordinate type. e.g. float, float2, or float3 for
  *           1D, 2D, and 3D, respectively
- *  @param   imageHandle The mipmap image handle
- *  @param   coords The coordinates at which to sample mipmap image data
- *  @param   level The mipmap level at which to sample
- *  @return  Mipmap image data with LOD filtering
- */
-template <typename DataT, typename HintT = DataT, typename CoordT>
-__SYCL_DEPRECATED("read_mipmap has been deprecated. "
-                  "Instead use sample_mipmap.")
-DataT read_mipmap(const sampled_image_handle &imageHandle [[maybe_unused]],
-                  const CoordT &coords [[maybe_unused]],
-                  const float level [[maybe_unused]]) {
-  return sample_mipmap(imageHandle, coords, level);
-}
-
-/**
- *  @brief   [Deprecated] Read a mipmap image using its handle with anisotropic
- *           filtering
+ *  @param   imageHandle The image handle
+ *  @param   coords The coordinates at which to sample image data
+ *  @return  Sampled image data
  *
- *  @tparam  DataT The return type
- *  @tparam  HintT A hint type that can be used to select for a specialized
- *           backend intrinsic when a user-defined type is passed as `DataT`.
- *           HintT should be a `sycl::vec` type, `sycl::half` type, or POD type.
- *           HintT must also have the same size as DataT.
- *  @tparam  CoordT The input coordinate type. e.g. float, float2, or float3 for
- *           1D, 2D, and 3D, respectively
- *  @param   imageHandle The mipmap image handle
- *  @param   coords The coordinates at which to sample mipmap image data
- *  @param   dX Screen space gradient in the x dimension
- *  @param   dY Screen space gradient in the y dimension
- *  @return  Mipmap image data with anisotropic filtering
+ *  __NVPTX__: Name mangling info
+ *             Cuda surfaces require integer coords (by bytes)
+ *             Cuda textures require float coords (by element or normalized)
+ *             The name mangling should therefore not interfere with one
+ *             another
  */
 template <typename DataT, typename HintT = DataT, typename CoordT>
-__SYCL_DEPRECATED("read_mipmap has been deprecated. "
-                  "Instead use sample_mipmap.")
-DataT read_mipmap(const sampled_image_handle &imageHandle [[maybe_unused]],
-                  const CoordT &coords [[maybe_unused]],
-                  const CoordT &dX [[maybe_unused]],
-                  const CoordT &dY [[maybe_unused]]) {
-  return sample_mipmap(imageHandle, coords, dX, dY);
+__SYCL_DEPRECATED("read_image for standard sampled images is deprecated. "
+                  "Instead use sample_image with floating point coordinates or "
+                  "fetch_image with integer coordinates.")
+DataT read_image(const sampled_image_handle &imageHandle [[maybe_unused]],
+                 const CoordT &coords [[maybe_unused]]) {
+  detail::assert_coords_type<CoordT>();
+  if constexpr (detail::are_floating_coords<CoordT>()) {
+    return sample_image<DataT>(imageHandle, coords);
+  } else if constexpr (detail::are_integer_coords<CoordT>()) {
+    return fetch_image<DataT>(imageHandle, coords);
+  }
 }
 
 /**
@@ -1127,6 +1081,58 @@ DataT sample_mipmap(const sampled_image_handle &imageHandle [[maybe_unused]],
 #endif
 }
 
+/**
+ *  @brief   [Deprecated] Read a mipmap image using its handle with LOD
+ *           filtering
+ *
+ *  @tparam  DataT The return type
+ *  @tparam  HintT A hint type that can be used to select for a specialized
+ *           backend intrinsic when a user-defined type is passed as `DataT`.
+ *           HintT should be a `sycl::vec` type, `sycl::half` type, or POD type.
+ *           HintT must also have the same size as DataT.
+ *  @tparam  CoordT The input coordinate type. e.g. float, float2, or float3 for
+ *           1D, 2D, and 3D, respectively
+ *  @param   imageHandle The mipmap image handle
+ *  @param   coords The coordinates at which to sample mipmap image data
+ *  @param   level The mipmap level at which to sample
+ *  @return  Mipmap image data with LOD filtering
+ */
+template <typename DataT, typename HintT = DataT, typename CoordT>
+__SYCL_DEPRECATED("read_mipmap has been deprecated. "
+                  "Instead use sample_mipmap.")
+DataT read_mipmap(const sampled_image_handle &imageHandle [[maybe_unused]],
+                  const CoordT &coords [[maybe_unused]],
+                  const float level [[maybe_unused]]) {
+  return sample_mipmap<DataT>(imageHandle, coords, level);
+}
+
+/**
+ *  @brief   [Deprecated] Read a mipmap image using its handle with anisotropic
+ *           filtering
+ *
+ *  @tparam  DataT The return type
+ *  @tparam  HintT A hint type that can be used to select for a specialized
+ *           backend intrinsic when a user-defined type is passed as `DataT`.
+ *           HintT should be a `sycl::vec` type, `sycl::half` type, or POD type.
+ *           HintT must also have the same size as DataT.
+ *  @tparam  CoordT The input coordinate type. e.g. float, float2, or float3 for
+ *           1D, 2D, and 3D, respectively
+ *  @param   imageHandle The mipmap image handle
+ *  @param   coords The coordinates at which to sample mipmap image data
+ *  @param   dX Screen space gradient in the x dimension
+ *  @param   dY Screen space gradient in the y dimension
+ *  @return  Mipmap image data with anisotropic filtering
+ */
+template <typename DataT, typename HintT = DataT, typename CoordT>
+__SYCL_DEPRECATED("read_mipmap has been deprecated. "
+                  "Instead use sample_mipmap.")
+DataT read_mipmap(const sampled_image_handle &imageHandle [[maybe_unused]],
+                  const CoordT &coords [[maybe_unused]],
+                  const CoordT &dX [[maybe_unused]],
+                  const CoordT &dY [[maybe_unused]]) {
+  return sample_mipmap<DataT>(imageHandle, coords, dX, dY);
+}
+
 /**
  *  @brief   [Deprecated] Read a mipmap image using its handle with LOD
  *           filtering
@@ -1149,7 +1155,7 @@ __SYCL_DEPRECATED("read_image for mipmaps is deprecated. "
 DataT read_image(const sampled_image_handle &imageHandle [[maybe_unused]],
                  const CoordT &coords [[maybe_unused]],
                  const float level [[maybe_unused]]) {
-  return sample_mipmap(imageHandle, coords, level);
+  return sample_mipmap<DataT>(imageHandle, coords, level);
 }
 
 /**
@@ -1176,7 +1182,7 @@ DataT read_image(const sampled_image_handle &imageHandle [[maybe_unused]],
                  const CoordT &coords [[maybe_unused]],
                  const CoordT &dX [[maybe_unused]],
                  const CoordT &dY [[maybe_unused]]) {
-  return sample_mipmap(imageHandle, coords, dX, dY);
+  return sample_mipmap<DataT>(imageHandle, coords, dX, dY);
 }
 
 /**
diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index c6f82486de2eb..4c88b3a971704 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -57,7 +57,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   include(FetchContent)
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 18f3039ce5ba360a3ed00e614203bb4eda960e06
+  # commit 747dad97f8da54c9c2c0bd255c20d6329a7e996f
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -69,7 +69,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG 18f3039ce5ba360a3ed00e614203bb4eda960e06)
+  set(UNIFIED_RUNTIME_TAG 747dad97f8da54c9c2c0bd255c20d6329a7e996f)
 
   if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
     set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp
index bacafd92fcf21..557cbc7e9f243 100644
--- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp
@@ -57,7 +57,7 @@ int main() {
 
       cgh.parallel_for<kernel_sampled_fetch>(width, [=](sycl::id<1> id) {
         // Extension: fetch data from sampled image handle
-        float px1 = syclexp::read_image<float>(imgHandle, int(id[0]));
+        float px1 = syclexp::fetch_image<float>(imgHandle, int(id[0]));
 
         outAcc[id] = px1;
       });
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp
index 0c5108f5b03f4..3af92e0c00935 100644
--- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp
@@ -79,7 +79,7 @@ int main() {
 
             // Extension: fetch data from sampled image handle
             float px1 =
-                syclexp::read_image<float>(imgHandle, sycl::int2(dim0, dim1));
+                syclexp::fetch_image<float>(imgHandle, sycl::int2(dim0, dim1));
 
             outAcc[sycl::id<2>{dim1, dim0}] = px1;
           });
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp
index 8932560b95e91..12608e3908b0a 100644
--- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp
@@ -84,7 +84,7 @@ int main() {
             size_t dim1 = it.get_local_id(1);
 
             // Extension: fetch data from sampled image handle
-            auto px1 = syclexp::read_image<sycl::vec<uint16_t, 4>>(
+            auto px1 = syclexp::fetch_image<sycl::vec<uint16_t, 4>>(
                 imgHandle, sycl::int2(dim0, dim1));
 
             outAcc[sycl::id<2>{dim1, dim0}] = px1;
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
index 2cec4ab976a98..12227c05407e4 100644
--- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
@@ -82,8 +82,8 @@ int main() {
             size_t dim2 = it.get_local_id(2);
 
             // Extension: fetch data from sampled image handle
-            float px1 = syclexp::read_image<float>(
-                imgHandle, sycl::vec<int, 4>(dim0, dim1, dim2, 0));
+            float px1 = syclexp::fetch_image<float>(
+                imgHandle, sycl::vec<int, 3>(dim0, dim1, dim2));
 
             outAcc[sycl::id<3>{dim2, dim1, dim0}] = px1;
           });

From e1e22ba54dedb4351ca366ff4994795beb8d816f Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Thu, 22 Feb 2024 14:59:49 +0000
Subject: [PATCH 10/34] Make fetch operation naming consistent across the stack
 (API -> libclc)

---
 libclc/ptx-nvidiacl/libspirv/images/image.cl  | 206 +++++++++---------
 sycl/include/CL/__spirv/spirv_ops.hpp         |   3 +
 sycl/include/sycl/detail/image_ocl_types.hpp  |  21 +-
 .../sycl/ext/oneapi/bindless_images.hpp       |  17 +-
 4 files changed, 127 insertions(+), 120 deletions(-)

diff --git a/libclc/ptx-nvidiacl/libspirv/images/image.cl b/libclc/ptx-nvidiacl/libspirv/images/image.cl
index 0fbb2ef686bf5..feb7671813d80 100644
--- a/libclc/ptx-nvidiacl/libspirv/images/image.cl
+++ b/libclc/ptx-nvidiacl/libspirv/images/image.cl
@@ -1637,11 +1637,11 @@ void __nvvm_sust_3d_v4f16_clamp_s(unsigned long imageHandle, int x, int y,
                                       as_short(b), as_short(c), as_short(d));
 }
 
-#define _CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(                               \
+#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(                              \
     elem_t, dimension, elem_t_mangled, vec_size, coord_mangled, coord_input,   \
     ...)                                                                       \
   _CLC_DEF elem_t MANGLE_FUNC_IMG_HANDLE(                                      \
-      17, __spirv_ImageRead, I##elem_t_mangled,                                \
+      18, __spirv_ImageFetch, I##elem_t_mangled,                               \
       coord_mangled##ET_T0_T1_)(ulong imageHandle, coord_input) {              \
     return __nvvm_suld_##dimension##d_##vec_size##_clamp_s(imageHandle,        \
                                                            __VA_ARGS__);       \
@@ -1656,94 +1656,94 @@ void __nvvm_sust_3d_v4f16_clamp_s(unsigned long imageHandle, int x, int y,
     __nvvm_sust_##dimension##d_##vec_size##_clamp_s(imageHandle, __VA_ARGS__); \
   }
 
-// READS
+// Fetching unsampled images
 // Int
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int, 1, i, i32, i, int x, x * sizeof(int))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int, 2, i, i32, Dv2_i, int2 coord, coord.x * sizeof(int), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int, 3, i, i32, Dv3_i, int3 coord, coord.x * sizeof(int), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int2, 1, Dv2_i, v2i32, i, int x, x * sizeof(int2))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int2, 2, Dv2_i, v2i32, S0_, int2 coord, coord.x * sizeof(int2), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int2, 3, Dv2_i, v2i32, Dv3_i, int3 coord, coord.x * sizeof(int2), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int4, 1, Dv4_i, v4i32, i, int x, x * sizeof(int4))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int4, 2, Dv4_i, v4i32, Dv2_i, int2 coord, coord.x * sizeof(int4), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(int4, 3, Dv4_i, v4i32, Dv3_i, int3 coord, coord.x * sizeof(int4), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int, 1, i, i32, i, int x, x * sizeof(int))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int, 2, i, i32, Dv2_i, int2 coord, coord.x * sizeof(int), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int, 3, i, i32, Dv3_i, int3 coord, coord.x * sizeof(int), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int2, 1, Dv2_i, v2i32, i, int x, x * sizeof(int2))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int2, 2, Dv2_i, v2i32, S0_, int2 coord, coord.x * sizeof(int2), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int2, 3, Dv2_i, v2i32, Dv3_i, int3 coord, coord.x * sizeof(int2), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int4, 1, Dv4_i, v4i32, i, int x, x * sizeof(int4))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int4, 2, Dv4_i, v4i32, Dv2_i, int2 coord, coord.x * sizeof(int4), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(int4, 3, Dv4_i, v4i32, Dv3_i, int3 coord, coord.x * sizeof(int4), coord.y, coord.z)
 
 // Unsigned Int
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(unsigned int, 1, j, j32, i, int x, x * sizeof(unsigned int))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(unsigned int, 2, j, j32, Dv2_i, int2 coord, coord.x * sizeof(unsigned int), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(unsigned int, 3, j, j32, Dv3_i, int3 coord, coord.x * sizeof(unsigned int), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uint2, 1, Dv2_j, v2j32, i, int x, x * sizeof(uint2))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uint2, 2, Dv2_j, v2j32, Dv2_i, int2 coord, coord.x * sizeof(uint2), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uint2, 3, Dv2_j, v2j32, Dv3_i, int3 coord, coord.x * sizeof(uint2), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uint4, 1, Dv4_j, v4j32, i, int x, x * sizeof(uint4))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uint4, 2, Dv4_j, v4j32, Dv2_i, int2 coord, coord.x * sizeof(uint4), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uint4, 3, Dv4_j, v4j32, Dv3_i, int3 coord, coord.x * sizeof(uint4), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(unsigned int, 1, j, j32, i, int x, x * sizeof(unsigned int))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(unsigned int, 2, j, j32, Dv2_i, int2 coord, coord.x * sizeof(unsigned int), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(unsigned int, 3, j, j32, Dv3_i, int3 coord, coord.x * sizeof(unsigned int), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uint2, 1, Dv2_j, v2j32, i, int x, x * sizeof(uint2))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uint2, 2, Dv2_j, v2j32, Dv2_i, int2 coord, coord.x * sizeof(uint2), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uint2, 3, Dv2_j, v2j32, Dv3_i, int3 coord, coord.x * sizeof(uint2), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uint4, 1, Dv4_j, v4j32, i, int x, x * sizeof(uint4))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uint4, 2, Dv4_j, v4j32, Dv2_i, int2 coord, coord.x * sizeof(uint4), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uint4, 3, Dv4_j, v4j32, Dv3_i, int3 coord, coord.x * sizeof(uint4), coord.y, coord.z)
 
 // Short
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short, 1, s, i16, i, int x, x * sizeof(short))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short, 2, s, i16, Dv2_i, int2 coord, coord.x * sizeof(short), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short, 3, s, i16, Dv3_i, int3 coord, coord.x * sizeof(short), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short2, 1, Dv2_s, v2i16, i, int x, x * sizeof(short2))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short2, 2, Dv2_s, v2i16, Dv2_i, int2 coord, coord.x * sizeof(short2), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short2, 3, Dv2_s, v2i16, Dv3_i, int3 coord, coord.x * sizeof(short2), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short4, 1, Dv4_s, v4i16, i, int x, x * sizeof(short4))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short4, 2, Dv4_s, v4i16, Dv2_i, int2 coord, coord.x * sizeof(short4), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(short4, 3, Dv4_s, v4i16, Dv3_i, int3 coord, coord.x * sizeof(short4), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short, 1, s, i16, i, int x, x * sizeof(short))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short, 2, s, i16, Dv2_i, int2 coord, coord.x * sizeof(short), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short, 3, s, i16, Dv3_i, int3 coord, coord.x * sizeof(short), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short2, 1, Dv2_s, v2i16, i, int x, x * sizeof(short2))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short2, 2, Dv2_s, v2i16, Dv2_i, int2 coord, coord.x * sizeof(short2), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short2, 3, Dv2_s, v2i16, Dv3_i, int3 coord, coord.x * sizeof(short2), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short4, 1, Dv4_s, v4i16, i, int x, x * sizeof(short4))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short4, 2, Dv4_s, v4i16, Dv2_i, int2 coord, coord.x * sizeof(short4), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(short4, 3, Dv4_s, v4i16, Dv3_i, int3 coord, coord.x * sizeof(short4), coord.y, coord.z)
 
 // Unsigned Short
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort, 1, t, t16, i, int x, x * sizeof(ushort))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort, 2, t, t16, Dv2_i, int2 coord, coord.x * sizeof(ushort), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort, 3, t, t16, Dv3_i, int3 coord, coord.x * sizeof(ushort), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort2, 1, Dv2_t, v2t16, i, int x, x * sizeof(ushort2))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort2, 2, Dv2_t, v2t16, Dv2_i, int2 coord, coord.x * sizeof(ushort2), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort2, 3, Dv2_t, v2t16, Dv3_i, int3 coord, coord.x * sizeof(ushort2), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort4, 1, Dv4_t, v4t16, i, int x, x * sizeof(ushort4))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort4, 2, Dv4_t, v4t16, Dv2_i, int2 coord, coord.x * sizeof(ushort4), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(ushort4, 3, Dv4_t, v4t16, Dv3_i, int3 coord, coord.x * sizeof(ushort4), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort, 1, t, t16, i, int x, x * sizeof(ushort))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort, 2, t, t16, Dv2_i, int2 coord, coord.x * sizeof(ushort), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort, 3, t, t16, Dv3_i, int3 coord, coord.x * sizeof(ushort), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 1, Dv2_t, v2t16, i, int x, x * sizeof(ushort2))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 2, Dv2_t, v2t16, Dv2_i, int2 coord, coord.x * sizeof(ushort2), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 3, Dv2_t, v2t16, Dv3_i, int3 coord, coord.x * sizeof(ushort2), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 1, Dv4_t, v4t16, i, int x, x * sizeof(ushort4))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 2, Dv4_t, v4t16, Dv2_i, int2 coord, coord.x * sizeof(ushort4), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 3, Dv4_t, v4t16, Dv3_i, int3 coord, coord.x * sizeof(ushort4), coord.y, coord.z)
 
 // Char
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char, 1, a, i8, i, int x, x * sizeof(char))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char, 2, a, i8, Dv2_i, int2 coord, coord.x * sizeof(char), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char, 3, a, i8, Dv3_i, int3 coord, coord.x * sizeof(char), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char2, 1, Dv2_a, v2i8, i, int x, x * sizeof(char2))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char2, 2, Dv2_a, v2i8, Dv2_i, int2 coord, coord.x * sizeof(char2), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char2, 3, Dv2_a, v2i8, Dv3_i, int3 coord, coord.x * sizeof(char2), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char4, 1, Dv4_a, v4i8, i, int x, x * sizeof(char4))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char4, 2, Dv4_a, v4i8, Dv2_i, int2 coord, coord.x * sizeof(char4), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(char4, 3, Dv4_a, v4i8, Dv3_i, int3 coord, coord.x * sizeof(char4), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char, 1, a, i8, i, int x, x * sizeof(char))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char, 2, a, i8, Dv2_i, int2 coord, coord.x * sizeof(char), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char, 3, a, i8, Dv3_i, int3 coord, coord.x * sizeof(char), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char2, 1, Dv2_a, v2i8, i, int x, x * sizeof(char2))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char2, 2, Dv2_a, v2i8, Dv2_i, int2 coord, coord.x * sizeof(char2), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char2, 3, Dv2_a, v2i8, Dv3_i, int3 coord, coord.x * sizeof(char2), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char4, 1, Dv4_a, v4i8, i, int x, x * sizeof(char4))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char4, 2, Dv4_a, v4i8, Dv2_i, int2 coord, coord.x * sizeof(char4), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(char4, 3, Dv4_a, v4i8, Dv3_i, int3 coord, coord.x * sizeof(char4), coord.y, coord.z)
 
 // Unsigned Char
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar, 1, h, h8, i, int x, x * sizeof(uchar))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar, 2, h, h8, Dv2_i, int2 coord, coord.x * sizeof(uchar), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar, 3, h, h8, Dv3_i, int3 coord, coord.x * sizeof(uchar), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar2, 1, Dv2_h, v2h8, i, int x, x * sizeof(uchar2))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar2, 2, Dv2_h, v2h8, Dv2_i, int2 coord, coord.x * sizeof(uchar2), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar2, 3, Dv2_h, v2h8, Dv3_i, int3 coord, coord.x * sizeof(uchar2), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar4, 1, Dv4_h, v4h8, i, int x, x * sizeof(uchar4))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar4, 2, Dv4_h, v4h8, Dv2_i, int2 coord, coord.x * sizeof(uchar4), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(uchar4, 3, Dv4_h, v4h8, Dv3_i, int3 coord, coord.x * sizeof(uchar4), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar, 1, h, h8, i, int x, x * sizeof(uchar))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar, 2, h, h8, Dv2_i, int2 coord, coord.x * sizeof(uchar), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar, 3, h, h8, Dv3_i, int3 coord, coord.x * sizeof(uchar), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 1, Dv2_h, v2h8, i, int x, x * sizeof(uchar2))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 2, Dv2_h, v2h8, Dv2_i, int2 coord, coord.x * sizeof(uchar2), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 3, Dv2_h, v2h8, Dv3_i, int3 coord, coord.x * sizeof(uchar2), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 1, Dv4_h, v4h8, i, int x, x * sizeof(uchar4))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 2, Dv4_h, v4h8, Dv2_i, int2 coord, coord.x * sizeof(uchar4), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 3, Dv4_h, v4h8, Dv3_i, int3 coord, coord.x * sizeof(uchar4), coord.y, coord.z)
 
 // Float
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float, 1, f, f32, i, int x, x * sizeof(float))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float, 2, f, f32, Dv2_i, int2 coord, coord.x * sizeof(float), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float, 3, f, f32, Dv3_i, int3 coord, coord.x * sizeof(float), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float2, 1, Dv2_f, v2f32, i, int x, x * sizeof(float2))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float2, 2, Dv2_f, v2f32, Dv2_i, int2 coord, coord.x * sizeof(float2), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float2, 3, Dv2_f, v2f32, Dv3_i, int3 coord, coord.x * sizeof(float2), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float4, 1, Dv4_f, v4f32, i, int x, x * sizeof(float4))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float4, 2, Dv4_f, v4f32, Dv2_i, int2 coord, coord.x * sizeof(float4), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(float4, 3, Dv4_f, v4f32, Dv3_i, int3 coord, coord.x * sizeof(float4), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float, 1, f, f32, i, int x, x * sizeof(float))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float, 2, f, f32, Dv2_i, int2 coord, coord.x * sizeof(float), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float, 3, f, f32, Dv3_i, int3 coord, coord.x * sizeof(float), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float2, 1, Dv2_f, v2f32, i, int x, x * sizeof(float2))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float2, 2, Dv2_f, v2f32, Dv2_i, int2 coord, coord.x * sizeof(float2), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float2, 3, Dv2_f, v2f32, Dv3_i, int3 coord, coord.x * sizeof(float2), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float4, 1, Dv4_f, v4f32, i, int x, x * sizeof(float4))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float4, 2, Dv4_f, v4f32, Dv2_i, int2 coord, coord.x * sizeof(float4), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(float4, 3, Dv4_f, v4f32, Dv3_i, int3 coord, coord.x * sizeof(float4), coord.y, coord.z)
 
 // Half
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half, 1, DF16_, f16, i, int x, x * sizeof(half))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half, 2, DF16_, f16, Dv2_i, int2 coord, coord.x * sizeof(half), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half, 3, DF16_, f16, Dv3_i, int3 coord, coord.x * sizeof(half), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half2, 1, Dv2_DF16_, v2f16, i, int x, x * sizeof(half2))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half2, 2, Dv2_DF16_, v2f16, Dv2_i, int2 coord, coord.x * sizeof(half2), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half2, 3, Dv2_DF16_, v2f16, Dv3_i, int3 coord, coord.x * sizeof(half2), coord.y, coord.z)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half4, 1, Dv4_DF16_, v4f16, i, int x, x * sizeof(half4))
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half4, 2, Dv4_DF16_, v4f16, Dv2_i, int2 coord, coord.x * sizeof(half4), coord.y)
-_CLC_DEFINE_IMAGE_BINDLESS_READ_BUILTIN(half4, 3, Dv4_DF16_, v4f16, Dv3_i, int3 coord, coord.x * sizeof(half4), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half, 1, DF16_, f16, i, int x, x * sizeof(half))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half, 2, DF16_, f16, Dv2_i, int2 coord, coord.x * sizeof(half), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half, 3, DF16_, f16, Dv3_i, int3 coord, coord.x * sizeof(half), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half2, 1, Dv2_DF16_, v2f16, i, int x, x * sizeof(half2))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half2, 2, Dv2_DF16_, v2f16, Dv2_i, int2 coord, coord.x * sizeof(half2), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half2, 3, Dv2_DF16_, v2f16, Dv3_i, int3 coord, coord.x * sizeof(half2), coord.y, coord.z)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half4, 1, Dv4_DF16_, v4f16, i, int x, x * sizeof(half4))
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half4, 2, Dv4_DF16_, v4f16, Dv2_i, int2 coord, coord.x * sizeof(half4), coord.y)
+_CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(half4, 3, Dv4_DF16_, v4f16, Dv3_i, int3 coord, coord.x * sizeof(half4), coord.y, coord.z)
 
 // WRITES
 // Int
@@ -2397,100 +2397,100 @@ _CLC_DEFINE_BINDLESS_THUNK_TEXTURE_FETCH_BUILTIN_ALL_DIMS(half, float, f16, v4f3
     elem_t, dimension, elem_t_mangled, vec_size, coord_mangled, coord_input,   \
     ...)                                                                       \
   _CLC_DEF elem_t MANGLE_FUNC_IMG_HANDLE(                                      \
-      18, __spirv_ImageFetch, I##elem_t_mangled,                               \
+      25, __spirv_SampledImageFetch, I##elem_t_mangled,                        \
       coord_mangled##ET_T0_T1_)(ulong imageHandle, coord_input) {              \
     return __nvvm_tex_##dimension##d_##vec_size##_i32(imageHandle,             \
                                                       __VA_ARGS__);            \
   }
 
+
 // Int
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int, 1, i, i32, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int, 2, i, i32, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int, 3, i, i32, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int, 3, i, i32, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 1, Dv2_i, v2i32, i, int x, x)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 2, Dv2_i, v2i32, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 3, Dv2_i, v2i32, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 2, Dv2_i, v2i32, S0_, int2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int2, 3, Dv2_i, v2i32, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int4, 1, Dv4_i, v4i32, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int4, 2, Dv4_i, v4i32, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int4, 3, Dv4_i, v4i32, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(int4, 3, Dv4_i, v4i32, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 
 // Unsigned int
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint, 1, j, j32, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint, 2, j, j32, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint, 3, j, j32, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint, 3, j, j32, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint2, 1, Dv2_j, v2j32, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint2, 2, Dv2_j, v2j32, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint2, 3, Dv2_j, v2j32, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint2, 3, Dv2_j, v2j32, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint4, 1, Dv4_j, v4j32, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint4, 2, Dv4_j, v4j32, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint4, 3, Dv4_j, v4j32, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uint4, 3, Dv4_j, v4j32, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 
 // Short
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short, 1, s, i16, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short, 2, s, i16, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short, 3, s, i16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short, 3, s, i16, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short2, 1, Dv2_s, v2i16, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short2, 2, Dv2_s, v2i16, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short2, 3, Dv2_s, v2i16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short2, 3, Dv2_s, v2i16, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short4, 1, Dv4_s, v4i16, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short4, 2, Dv4_s, v4i16, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short4, 3, Dv4_s, v4i16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(short4, 3, Dv4_s, v4i16, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 
 // Unsigned short
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort, 1, t, t16, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort, 2, t, t16, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort, 3, t, t16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort, 3, t, t16, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 1, Dv2_t, v2t16, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 2, Dv2_t, v2t16, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 3, Dv2_t, v2t16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort2, 3, Dv2_t, v2t16, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 1, Dv4_t, v4t16, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 2, Dv4_t, v4t16, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 3, Dv4_t, v4t16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(ushort4, 3, Dv4_t, v4t16, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 
 // Char
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char, 1, a, i8, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char, 2, a, i8, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char, 3, a, i8, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char, 3, a, i8, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char2, 1, Dv2_a, v2i8, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char2, 2, Dv2_a, v2i8, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char2, 3, Dv2_a, v2i8, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char2, 3, Dv2_a, v2i8, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char4, 1, Dv4_a, v4i8, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char4, 2, Dv4_a, v4i8, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char4, 3, Dv4_a, v4i8, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(char4, 3, Dv4_a, v4i8, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 
 // Unsigned Char
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar, 1, h, h8, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar, 2, h, h8, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar, 3, h, h8, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar, 3, h, h8, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 1, Dv2_h, v2h8, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 2, Dv2_h, v2h8, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 3, Dv2_h, v2h8, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar2, 3, Dv2_h, v2h8, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 1, Dv4_h, v4h8, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 2, Dv4_h, v4h8, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 3, Dv4_h, v4h8, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(uchar4, 3, Dv4_h, v4h8, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 
 // Float
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float, 1, f, f32, i, uint x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float, 2, f, f32, Dv2_i, uint2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float, 3, f, f32, Dv4_i, uint4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float, 3, f, f32, Dv3_i, uint4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 1, Dv2_f, v2f32, i, uint x, x)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 2, Dv2_f, v2f32, S0_, uint2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 3, Dv2_f, v2f32, Dv4_i, uint4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 2, Dv2_f, v2f32, Dv2_i, uint2 coord, coord.x, coord.y)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float2, 3, Dv2_f, v2f32, Dv3_i, uint4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float4, 1, Dv4_f, v4f32, i, uint x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float4, 2, Dv4_f, v4f32, Dv2_i, uint2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float4, 3, Dv4_f, v4f32, S0_, uint4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(float4, 3, Dv4_f, v4f32, Dv3_i, uint4 coord, coord.x, coord.y, coord.z)
 
 // Half
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half, 1, DF16_, f16, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half, 2, DF16_, f16, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half, 3, DF16_, f16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half, 3, DF16_, f16, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half2, 1, Dv2_DF16_, v2f16, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half2, 2, Dv2_DF16_, v2f16, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half2, 3, Dv2_DF16_, v2f16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half2, 3, Dv2_DF16_, v2f16, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half4, 1, Dv4_DF16_, v4f16, i, int x, x)
 _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half4, 2, Dv4_DF16_, v4f16, Dv2_i, int2 coord, coord.x, coord.y)
-_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half4, 3, Dv4_DF16_, v4f16, Dv4_i, int4 coord, coord.x, coord.y, coord.z)
-
+_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_FETCH_BUILTIN(half4, 3, Dv4_DF16_, v4f16, Dv3_i, int4 coord, coord.x, coord.y, coord.z)
 
 // <--- MIPMAP --->
 
diff --git a/sycl/include/CL/__spirv/spirv_ops.hpp b/sycl/include/CL/__spirv/spirv_ops.hpp
index 8a0e6866351f9..a1bc9e407db8f 100644
--- a/sycl/include/CL/__spirv/spirv_ops.hpp
+++ b/sycl/include/CL/__spirv/spirv_ops.hpp
@@ -203,6 +203,9 @@ extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageRead(ImageT, TempArgT);
 template <class RetT, typename ImageT, typename TempArgT>
 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageFetch(ImageT, TempArgT);
 
+template <class RetT, typename ImageT, typename TempArgT>
+extern __DPCPP_SYCL_EXTERNAL RetT __spirv_SampledImageFetch(ImageT, TempArgT);
+
 template <typename ImageT, typename SampledType>
 extern __DPCPP_SYCL_EXTERNAL SampledType __spirv_SampledImage(ImageT,
                                                               __ocl_sampler_t);
diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp
index 2a10a722c565a..8b825b5432e89 100644
--- a/sycl/include/sycl/detail/image_ocl_types.hpp
+++ b/sycl/include/sycl/detail/image_ocl_types.hpp
@@ -87,20 +87,21 @@ static RetType __invoke__ImageFetch(ImageT Img, CoordT Coords) {
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      __spirv_ImageFetch<TempRetT, ImageT, decltype(TmpCoords)>(Img, TmpCoords));
+      __spirv_ImageFetch<TempRetT, ImageT, decltype(TmpCoords)>(Img,
+                                                                TmpCoords));
 }
 
-// template <typename RetType, typename ImageT, typename CoordT>
-// static RetType __invoke__ImageFetch(ImageT Img, CoordT Coords) {
+template <typename RetType, typename ImageT, typename CoordT>
+static RetType __invoke__SampledImageFetch(ImageT Img, CoordT Coords) {
 
-//   // Convert from sycl types to builtin types to get correct function mangling.
-//   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
-//   using TempArgT = sycl::detail::ConvertToOpenCLType_t<CoordT>;
+  // Convert from sycl types to builtin types to get correct function mangling.
+  using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
+  auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
-//   TempArgT Arg = sycl::detail::convertDataToType<CoordT, TempArgT>(Coords);
-//   TempRetT Ret = __spirv_ImageFetch<TempRetT, ImageT, TempArgT>(Img, Arg);
-//   return sycl::detail::convertDataToType<TempRetT, RetType>(Ret);
-// }
+  return sycl::detail::convertFromOpenCLTypeFor<RetType>(
+      __spirv_SampledImageFetch<TempRetT, ImageT, decltype(TmpCoords)>(
+          Img, TmpCoords));
+}
 
 template <typename RetType, typename SmpImageT, typename CoordT>
 static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords,
diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp
index b859e8a38c89e..01126b12d8ac5 100644
--- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp
+++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp
@@ -808,7 +808,8 @@ template <typename DataT> constexpr bool is_recognized_standard_type() {
  *
  *  __NVPTX__: Name mangling info
  *             Cuda surfaces require integer coords (by bytes)
- *             Cuda textures require float coords (by element or normalized)
+ *             Cuda textures require float coords (by element or normalized) 
+ *             for sampling, and integer coords (by bytes) for fetching
  *             The name mangling should therefore not interfere with one
  *             another
  */
@@ -823,7 +824,7 @@ DataT fetch_image(const unsampled_image_handle &imageHandle [[maybe_unused]],
 
 #ifdef __SYCL_DEVICE_ONLY__
   if constexpr (detail::is_recognized_standard_type<DataT>()) {
-    return __invoke__ImageRead<DataT>(imageHandle.raw_handle, coords);
+    return __invoke__ImageFetch<DataT>(imageHandle.raw_handle, coords);
   } else {
     static_assert(sizeof(HintT) == sizeof(DataT),
                   "When trying to read a user-defined type, HintT must be of "
@@ -831,7 +832,7 @@ DataT fetch_image(const unsampled_image_handle &imageHandle [[maybe_unused]],
     static_assert(detail::is_recognized_standard_type<HintT>(),
                   "HintT must always be a recognized standard type");
     return sycl::bit_cast<DataT>(
-        __invoke__ImageRead<HintT>(imageHandle.raw_handle, coords));
+        __invoke__ImageFetch<HintT>(imageHandle.raw_handle, coords));
   }
 #else
   assert(false); // Bindless images not yet implemented on host
@@ -854,7 +855,8 @@ DataT fetch_image(const unsampled_image_handle &imageHandle [[maybe_unused]],
  *
  *  __NVPTX__: Name mangling info
  *             Cuda surfaces require integer coords (by bytes)
- *             Cuda textures require float coords (by element or normalized)
+ *             Cuda textures require float coords (by element or normalized) 
+ *             for sampling, and integer coords (by bytes) for fetching
  *             The name mangling should therefore not interfere with one
  *             another
  */
@@ -903,10 +905,10 @@ DataT fetch_image(const sampled_image_handle &imageHandle [[maybe_unused]],
 
 #ifdef __SYCL_DEVICE_ONLY__
   if constexpr (detail::is_recognized_standard_type<DataT>()) {
-    return __invoke__ImageRead<DataT>(imageHandle.raw_handle, coords);
+    return __invoke__SampledImageFetch<DataT>(imageHandle.raw_handle, coords);
   } else {
     return sycl::bit_cast<DataT>(
-        __invoke__ImageRead<HintT>(imageHandle.raw_handle, coords));
+        __invoke__SampledImageFetch<HintT>(imageHandle.raw_handle, coords));
   }
 #else
   assert(false); // Bindless images not yet implemented on host.
@@ -976,7 +978,8 @@ DataT sample_image(const sampled_image_handle &imageHandle [[maybe_unused]],
  *
  *  __NVPTX__: Name mangling info
  *             Cuda surfaces require integer coords (by bytes)
- *             Cuda textures require float coords (by element or normalized)
+ *             Cuda textures require float coords (by element or normalized) 
+ *             for sampling, and integer coords (by bytes) for fetching
  *             The name mangling should therefore not interfere with one
  *             another
  */

From 74324c63e9ecf0c9c6d008788395b9bf6d6a4d01 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Thu, 22 Feb 2024 15:22:23 +0000
Subject: [PATCH 11/34] Update UR repo TAG

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 4c88b3a971704..3c30af782b54d 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -57,7 +57,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   include(FetchContent)
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 747dad97f8da54c9c2c0bd255c20d6329a7e996f
+  # commit b386db5da1786da14dbcd5def02a381ebb098f15
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -69,7 +69,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG 747dad97f8da54c9c2c0bd255c20d6329a7e996f)
+  set(UNIFIED_RUNTIME_TAG b386db5da1786da14dbcd5def02a381ebb098f15)
 
   if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
     set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")

From 0cf37c2d28fdfef6afec25a07d4c941cc189cb80 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Thu, 22 Feb 2024 15:40:56 +0000
Subject: [PATCH 12/34] Add missing `fetch_image` API in specification document

---
 .../experimental/sycl_ext_oneapi_bindless_images.asciidoc  | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc
index 3aa647222ffe9..ebf554d9ab446 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc
@@ -988,6 +988,10 @@ namespace sycl::ext::oneapi::experimental {
 template <typename DataT, typename HintT = DataT, typename CoordT>
 DataT fetch_image(const unsampled_image_handle &ImageHandle,
                   const CoordT &Coords);
+
+template <typename DataT, typename HintT = DataT, typename CoordT>
+DataT fetch_image(const sampled_image_handle &ImageHandle,
+                  const CoordT &Coords);
 template <typename DataT, typename HintT = DataT, typename CoordT>
 DataT sample_image(const sampled_image_handle &ImageHandle, 
                    const CoordT &Coords);
@@ -1009,7 +1013,8 @@ When fetching from a sampled image handle, data exatly as is in memory, no
 sampling operations will be performed, and the `bindless_image_sampler` passed 
 to the image upon creation has no effect on the returned image data. Note that 
 not all devices may support fetching of sampled image data depending on the 
-dimension or backing memory type. We provide device aspect queries for this in <<querying_sampled_image_fetch_support>>.
+dimension or backing memory type. We provide device aspect queries for this in 
+<<querying_sampled_image_fetch_support>>.
 
 The user is required to pass a `DataT` template parameter, which specifies the
 return type of the `fetch_image` and `sample_image` functions. If `DataT` is 

From 72cb5a3c4c9528cb59376e64f51ae110b79b2ef6 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Thu, 22 Feb 2024 15:51:51 +0000
Subject: [PATCH 13/34] Fix PI versions in pi.h

---
 sycl/include/sycl/detail/pi.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h
index c74adff94baa2..4380ae5c7468c 100644
--- a/sycl/include/sycl/detail/pi.h
+++ b/sycl/include/sycl/detail/pi.h
@@ -152,15 +152,15 @@
 // 15.43 Changed the signature of piextMemGetNativeHandle to also take a
 // pi_device
 // 15.44 Add coarse-grain memory advice flag for HIP.
-// 15.45 Added device queries for sampled image fetch support
+// 15.45 Added piextKernelSuggestMaxCooperativeGroupCount and
+//       piextEnqueueCooperativeKernelLaunch.
+// 15.46 Added device queries for sampled image fetch support
 //         - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM
 //         - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D
 //         - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM
 //         - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D
 //         - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM
 //         - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D
-// 15.46 Added piextKernelSuggestMaxCooperativeGroupCount and
-//       piextEnqueueCooperativeKernelLaunch.
 
 #define _PI_H_VERSION_MAJOR 15
 #define _PI_H_VERSION_MINOR 46

From e7cdd4340ec29ba96f4828808361c9dddcae1c77 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Thu, 22 Feb 2024 16:20:24 +0000
Subject: [PATCH 14/34] Fix non-e2e bindless images test

---
 sycl/test/extensions/bindless_images.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sycl/test/extensions/bindless_images.cpp b/sycl/test/extensions/bindless_images.cpp
index 0f7f72e60771b..eece72c070930 100644
--- a/sycl/test/extensions/bindless_images.cpp
+++ b/sycl/test/extensions/bindless_images.cpp
@@ -4,7 +4,7 @@
 #include <sycl/sycl.hpp>
 
 // CHECK: spir_kernel void @_ZTSN4sycl3_V16detail19__pf_kernel_wrapperI10image_readEE
-// CHECK: tail call spir_func noundef <4 x float> @_Z17__spirv_ImageReadIDv4
+// CHECK: tail call spir_func noundef <4 x float> @_Z18__spirv_ImageFetchIDv4
 using namespace sycl::ext::oneapi::experimental;
 class image_read;
 int main() {

From 2cb042c71d68a833271e5f30086924639dc5cf43 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Mon, 4 Mar 2024 14:39:34 +0000
Subject: [PATCH 15/34] Revert existing aspect macros to original values

---
 sycl/include/sycl/device_aspect_macros.hpp | 158 ++++++++++-----------
 sycl/include/sycl/info/aspects.def         |  28 ++--
 2 files changed, 93 insertions(+), 93 deletions(-)

diff --git a/sycl/include/sycl/device_aspect_macros.hpp b/sycl/include/sycl/device_aspect_macros.hpp
index b62f498b23ad3..0bd137e228a01 100644
--- a/sycl/include/sycl/device_aspect_macros.hpp
+++ b/sycl/include/sycl/device_aspect_macros.hpp
@@ -273,79 +273,79 @@
 #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_mipmap_level_reference__ 0
 #endif
 
-#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d_usm__
-//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 53)
-#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ \
-  0
-#endif
-
-#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d__
-//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 54)
-#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d__ 0
-#endif
-
-#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d_usm__
-//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 55)
-#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ \
-  0
-#endif
-
-#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d__
-//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 56)
-#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d__ 0
-#endif
-
-#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d_usm__
-//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 57)
-#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ \
-  0
-#endif
-
-#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d__
-//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 58)
-#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d__ 0
-#endif
-
 #ifndef __SYCL_ALL_DEVICES_HAVE_ext_intel_esimd__
-//__SYCL_ASPECT(ext_intel_esimd, 59)
+//__SYCL_ASPECT(ext_intel_esimd, 53)
 #define __SYCL_ALL_DEVICES_HAVE_ext_intel_esimd__ 0
 #endif
 
 #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_ballot_group__
-// __SYCL_ASPECT(ext_oneapi_ballot_group, 60)
+// __SYCL_ASPECT(ext_oneapi_ballot_group, 54)
 #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_ballot_group__ 0
 #endif
 
 #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_fixed_size_group__
-// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 61)
+// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 55)
 #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_fixed_size_group__ 0
 #endif
 
 #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_opportunistic_group__
-// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 62)
+// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 56)
 #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_opportunistic_group__ 0
 #endif
 
 #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_tangle_group__
-// __SYCL_ASPECT(ext_oneapi_tangle_group, 63)
+// __SYCL_ASPECT(ext_oneapi_tangle_group, 57)
 #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_tangle_group__ 0
 #endif
 
 #ifndef __SYCL_ALL_DEVICES_HAVE_ext_intel_matrix__
-// __SYCL_ASPECT(ext_intel_matrix, 64)
+// __SYCL_ASPECT(ext_intel_matrix, 58)
 #define __SYCL_ALL_DEVICES_HAVE_ext_intel_matrix__ 0
 #endif
 
 #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_is_composite__
-// __SYCL_ASPECT(ext_oneapi_is_composite, 65)
+// __SYCL_ASPECT(ext_oneapi_is_composite, 59)
 #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_is_composite__ 0
 #endif
 
 #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_is_component__
-// __SYCL_ASPECT(ext_oneapi_is_component, 66)
+// __SYCL_ASPECT(ext_oneapi_is_component, 60)
 #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_is_component__ 0
 #endif
 
+#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d_usm__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 61)
+#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ \
+  0
+#endif
+
+#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 62)
+#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_1d__ 0
+#endif
+
+#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d_usm__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 63)
+#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ \
+  0
+#endif
+
+#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 64)
+#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_2d__ 0
+#endif
+
+#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d_usm__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 65)
+#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ \
+  0
+#endif
+
+#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 66)
+#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d__ 0
+#endif
+
 #ifndef __SYCL_ANY_DEVICE_HAS_host__
 // __SYCL_ASPECT(host, 0)
 #define __SYCL_ANY_DEVICE_HAS_host__ 0
@@ -611,72 +611,72 @@
 #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_mipmap_level_reference__ 0
 #endif
 
-#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d_usm__
-//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 53)
-#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ 0
-#endif
-
-#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d__
-//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 54)
-#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d__ 0
-#endif
-
-#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d_usm__
-//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 55)
-#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ 0
-#endif
-
-#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d__
-//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 56)
-#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d__ 0
-#endif
-
-#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d_usm__
-//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 57)
-#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ 0
-#endif
-
-#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d__
-//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 58)
-#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d__ 0
-#endif
-
 #ifndef __SYCL_ANY_DEVICE_HAS_ext_intel_esimd__
-//__SYCL_ASPECT(ext_intel_esimd, 59)
+//__SYCL_ASPECT(ext_intel_esimd, 53)
 #define __SYCL_ANY_DEVICE_HAS_ext_intel_esimd__ 0
 #endif
 
 #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_ballot_group__
-// __SYCL_ASPECT(ext_oneapi_ballot_group, 60)
+// __SYCL_ASPECT(ext_oneapi_ballot_group, 54)
 #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_ballot_group__ 0
 #endif
 
 #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_fixed_size_group__
-// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 61)
+// __SYCL_ASPECT(ext_oneapi_fixed_size_group, 55)
 #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_fixed_size_group__ 0
 #endif
 
 #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_opportunistic_group__
-// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 62)
+// __SYCL_ASPECT(ext_oneapi_opportunistic_group, 56)
 #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_opportunistic_group__ 0
 #endif
 
 #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_tangle_group__
-// __SYCL_ASPECT(ext_oneapi_tangle_group, 63)
+// __SYCL_ASPECT(ext_oneapi_tangle_group, 57)
 #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_tangle_group__ 0
 #endif
 
 #ifndef __SYCL_ANY_DEVICE_HAS_ext_intel_matrix__
-// __SYCL_ASPECT(ext_intel_matrix, 64)
+// __SYCL_ASPECT(ext_intel_matrix, 58)
 #define __SYCL_ANY_DEVICE_HAS_ext_intel_matrix__ 0
 #endif
 
 #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_is_composite__
-// __SYCL_ASPECT(ext_oneapi_is_composite, 65)
+// __SYCL_ASPECT(ext_oneapi_is_composite, 59)
 #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_is_composite__ 0
 #endif
 
 #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_is_component__
-// __SYCL_ASPECT(ext_oneapi_is_component, 66)
+// __SYCL_ASPECT(ext_oneapi_is_component, 60)
 #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_is_component__ 0
 #endif
+
+#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d_usm__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 61)
+#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d_usm__ 0
+#endif
+
+#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 62)
+#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_1d__ 0
+#endif
+
+#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d_usm__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 63)
+#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d_usm__ 0
+#endif
+
+#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 64)
+#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_2d__ 0
+#endif
+
+#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d_usm__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 65)
+#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d_usm__ 0
+#endif
+
+#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d__
+//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 66)
+#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d__ 0
+#endif
diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def
index 739698d371fbf..c1df9c53abd5a 100644
--- a/sycl/include/sycl/info/aspects.def
+++ b/sycl/include/sycl/info/aspects.def
@@ -47,17 +47,17 @@ __SYCL_ASPECT(ext_oneapi_interop_semaphore_export, 49)
 __SYCL_ASPECT(ext_oneapi_mipmap, 50)
 __SYCL_ASPECT(ext_oneapi_mipmap_anisotropy, 51)
 __SYCL_ASPECT(ext_oneapi_mipmap_level_reference, 52)
-__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 53)
-__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 54)
-__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 55)
-__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 56)
-__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 57)
-__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 58)
-__SYCL_ASPECT(ext_intel_esimd, 59)
-__SYCL_ASPECT(ext_oneapi_ballot_group, 60)
-__SYCL_ASPECT(ext_oneapi_fixed_size_group, 61)
-__SYCL_ASPECT(ext_oneapi_opportunistic_group, 62)
-__SYCL_ASPECT(ext_oneapi_tangle_group, 63)
-__SYCL_ASPECT(ext_intel_matrix, 64)
-__SYCL_ASPECT(ext_oneapi_is_composite, 65)
-__SYCL_ASPECT(ext_oneapi_is_component, 66)
+__SYCL_ASPECT(ext_intel_esimd, 53)
+__SYCL_ASPECT(ext_oneapi_ballot_group, 54)
+__SYCL_ASPECT(ext_oneapi_fixed_size_group, 55)
+__SYCL_ASPECT(ext_oneapi_opportunistic_group, 56)
+__SYCL_ASPECT(ext_oneapi_tangle_group, 57)
+__SYCL_ASPECT(ext_intel_matrix, 58)
+__SYCL_ASPECT(ext_oneapi_is_composite, 59)
+__SYCL_ASPECT(ext_oneapi_is_component, 60)
+__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d_usm, 61)
+__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_1d, 62)
+__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 63)
+__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 64)
+__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 65)
+__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 66)

From 6b11a394406b2bbf43645bf2f9b2047657da9ead Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Tue, 5 Mar 2024 18:53:41 +0000
Subject: [PATCH 16/34] Use `// REQUIRES: aspect-<...>` instead of querying the
 device

---
 .../bindless_images/sampled_fetch/fetch_1D_USM.cpp     | 10 +---------
 .../bindless_images/sampled_fetch/fetch_2D.cpp         | 10 +---------
 .../bindless_images/sampled_fetch/fetch_2D_USM.cpp     | 10 +---------
 .../bindless_images/sampled_fetch/fetch_3D.cpp         | 10 +---------
 4 files changed, 4 insertions(+), 36 deletions(-)

diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp
index 557cbc7e9f243..0644879136c29 100644
--- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp
@@ -1,5 +1,6 @@
 // REQUIRES: linux
 // REQUIRES: cuda
+// REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_1d_usm
 
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
@@ -17,15 +18,6 @@ int main() {
   sycl::queue q(dev);
   auto ctxt = q.get_context();
 
-  // Check if device supports 1D USM sampled image fetches
-  if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm)) {
-#ifdef VERBOSE_PRINT
-    std::cout << "Test skipped due to lack of device support for fetching 1D "
-                 "USM backed sampled images\n";
-#endif
-    return 0;
-  }
-
   // declare image data
   constexpr size_t width = 16;
   std::vector<float> out(width);
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp
index 3af92e0c00935..0199bf9428194 100644
--- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp
@@ -1,5 +1,6 @@
 // REQUIRES: linux
 // REQUIRES: cuda
+// REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_2d
 
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
@@ -18,15 +19,6 @@ int main() {
   sycl::queue q(dev);
   auto ctxt = q.get_context();
 
-  // Check if device supports 2D non-USM sampled image fetches
-  if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_2d)) {
-#ifdef VERBOSE_PRINT
-    std::cout << "Test skipped due to lack of device support for fetching 2D "
-                 "non-USM backed sampled images\n";
-#endif
-    return 0;
-  }
-
   // declare image data
   constexpr size_t width = 5;
   constexpr size_t height = 6;
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp
index 12608e3908b0a..3e5c9991d0ac5 100644
--- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp
@@ -1,5 +1,6 @@
 // REQUIRES: linux
 // REQUIRES: cuda
+// REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_2d_usm
 
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
@@ -18,15 +19,6 @@ int main() {
   sycl::queue q(dev);
   auto ctxt = q.get_context();
 
-  // Check if device supports 2D USM sampled image fetches
-  if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm)) {
-#ifdef VERBOSE_PRINT
-    std::cout << "Test skipped due to lack of device support for fetching 2D "
-                 "USM backed sampled images\n";
-#endif
-    return 0;
-  }
-
   // declare image data
   constexpr size_t width = 5;
   constexpr size_t height = 6;
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
index 12227c05407e4..2a08d36641b61 100644
--- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
@@ -1,5 +1,6 @@
 // REQUIRES: linux
 // REQUIRES: cuda
+// REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_3d
 
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
@@ -18,15 +19,6 @@ int main() {
   sycl::queue q(dev);
   auto ctxt = q.get_context();
 
-  // Check if device supports 3D non-USM sampled image fetches
-  if (!dev.has(sycl::aspect::ext_oneapi_bindless_sampled_image_fetch_3d)) {
-#ifdef VERBOSE_PRINT
-    std::cout << "Test skipped due to lack of device support for fetching 3D "
-                 "non-USM backed sampled images\n";
-#endif
-    return 0;
-  }
-
   // declare image data
   constexpr size_t width = 4;
   constexpr size_t height = 6;

From cd917da895405cc9d8c6e9a47433db72aa475e05 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Fri, 8 Mar 2024 14:24:19 +0000
Subject: [PATCH 17/34] Add verbose-print parameter to LIT config

No source code changes are now required to print verbose test output
---
 sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp | 3 ---
 sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp     | 3 ---
 sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp | 3 ---
 sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp     | 3 ---
 sycl/test-e2e/format.py                                      | 2 +-
 sycl/test-e2e/lit.cfg.py                                     | 5 +++++
 6 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp
index 0644879136c29..75824bef09928 100644
--- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp
@@ -8,9 +8,6 @@
 #include <iostream>
 #include <sycl/sycl.hpp>
 
-// Uncomment to print additional test information
-// #define VERBOSE_PRINT
-
 class kernel_sampled_fetch;
 
 int main() {
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp
index 0199bf9428194..6f6833748ec40 100644
--- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp
@@ -8,9 +8,6 @@
 #include <iostream>
 #include <sycl/sycl.hpp>
 
-// Uncomment to print additional test information
-// #define VERBOSE_PRINT
-
 class kernel_sampled_fetch;
 
 int main() {
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp
index 3e5c9991d0ac5..6b91eea0f4482 100644
--- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp
@@ -8,9 +8,6 @@
 #include <iostream>
 #include <sycl/sycl.hpp>
 
-// Uncomment to print additional test information
-// #define VERBOSE_PRINT
-
 class kernel_sampled_fetch;
 
 int main() {
diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
index 2a08d36641b61..f02cb389e7a97 100644
--- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
+++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp
@@ -8,9 +8,6 @@
 #include <iostream>
 #include <sycl/sycl.hpp>
 
-// Uncomment to print additional test information
-// #define VERBOSE_PRINT
-
 class kernel_sampled_fetch;
 
 int main() {
diff --git a/sycl/test-e2e/format.py b/sycl/test-e2e/format.py
index af44f2fece5af..b979c397cccfe 100644
--- a/sycl/test-e2e/format.py
+++ b/sycl/test-e2e/format.py
@@ -171,7 +171,7 @@ def execute(self, test, litConfig):
         # -that new tests by default would runnable there (unless they have
         # -other restrictions).
         substitutions.append(
-            ("%{build}", "%clangxx -fsycl -fsycl-targets=%{sycl_triple} %s")
+            ("%{build}", "%clangxx -fsycl -fsycl-targets=%{sycl_triple} %verbose_print %s")
         )
         if platform.system() == "Windows":
             substitutions.append(
diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py
index 869dce39a5a13..b46e674a4ffa3 100644
--- a/sycl/test-e2e/lit.cfg.py
+++ b/sycl/test-e2e/lit.cfg.py
@@ -376,6 +376,11 @@
     )
     config.substitutions.append(("%shared_lib", "-shared"))
 
+# Check if user passed verbose-print parameter, if yes, add VERBOSE_PRINT macro
+if 'verbose-print' in lit_config.params:
+    config.substitutions.append(("%verbose_print", "-DVERBOSE_PRINT"))
+else:
+    config.substitutions.append(("%verbose_print", ""))
 
 config.substitutions.append(("%vulkan_include_dir", config.vulkan_include_dir))
 config.substitutions.append(("%vulkan_lib", config.vulkan_lib))

From 8d792d27b6febc40f9e82e5c6b8ea827536df200 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Wed, 13 Mar 2024 10:09:33 +0000
Subject: [PATCH 18/34] Update UR TAG

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 94f16519b321d..66eb57c8afd57 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -57,7 +57,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   include(FetchContent)
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 803036ecc42978d4ed77401a39991c0d1d1239a8
+  # commit 30e4fd301f6c794f4a3216a79e14ccef04822d85
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -69,7 +69,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG 803036ecc42978d4ed77401a39991c0d1d1239a8)
+  set(UNIFIED_RUNTIME_TAG 30e4fd301f6c794f4a3216a79e14ccef04822d85)
 
   if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
     set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")

From 1657829382666ed5c064602069f13e0efa0d3738 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Thu, 4 Apr 2024 13:58:50 +0100
Subject: [PATCH 19/34] Formatting

---
 sycl/include/sycl/detail/pi.h                    |  2 +-
 sycl/include/sycl/ext/oneapi/bindless_images.hpp | 10 +++++-----
 sycl/test-e2e/format.py                          |  5 ++++-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h
index 4601ac636e803..948b061b76f32 100644
--- a/sycl/include/sycl/detail/pi.h
+++ b/sycl/include/sycl/detail/pi.h
@@ -458,7 +458,7 @@ typedef enum {
   PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_SUPPORT = 0x20113,
   PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT = 0x20114,
 
-    // Bindless images sampled image fetch
+  // Bindless images sampled image fetch
   PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM = 0x20115,
   PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D = 0x20116,
   PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM = 0x20117,
diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp
index 06792136c5cbc..6dd5a98f3a989 100644
--- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp
+++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp
@@ -806,7 +806,7 @@ template <typename DataT> constexpr bool is_recognized_standard_type() {
  *
  *  __NVPTX__: Name mangling info
  *             Cuda surfaces require integer coords (by bytes)
- *             Cuda textures require float coords (by element or normalized) 
+ *             Cuda textures require float coords (by element or normalized)
  *             for sampling, and integer coords (by bytes) for fetching
  *             The name mangling should therefore not interfere with one
  *             another
@@ -853,7 +853,7 @@ DataT fetch_image(const unsampled_image_handle &imageHandle [[maybe_unused]],
  *
  *  __NVPTX__: Name mangling info
  *             Cuda surfaces require integer coords (by bytes)
- *             Cuda textures require float coords (by element or normalized) 
+ *             Cuda textures require float coords (by element or normalized)
  *             for sampling, and integer coords (by bytes) for fetching
  *             The name mangling should therefore not interfere with one
  *             another
@@ -882,7 +882,7 @@ DataT read_image(const unsampled_image_handle &imageHandle [[maybe_unused]],
  *
  *  __NVPTX__: Name mangling info
  *             Cuda surfaces require integer coords (by bytes)
- *             Cuda textures require float coords (by element or normalized) 
+ *             Cuda textures require float coords (by element or normalized)
  *             for sampling, and integer coords (by bytes) for fetching
  *             The name mangling should therefore not interfere with one
  *             another
@@ -929,7 +929,7 @@ DataT fetch_image(const sampled_image_handle &imageHandle [[maybe_unused]],
  *
  *  __NVPTX__: Name mangling info
  *             Cuda surfaces require integer coords (by bytes)
- *             Cuda textures require float coords (by element or normalized) 
+ *             Cuda textures require float coords (by element or normalized)
  *             for sampling, and integer coords (by bytes) for fetching
  *             The name mangling should therefore not interfere with one
  *             another
@@ -976,7 +976,7 @@ DataT sample_image(const sampled_image_handle &imageHandle [[maybe_unused]],
  *
  *  __NVPTX__: Name mangling info
  *             Cuda surfaces require integer coords (by bytes)
- *             Cuda textures require float coords (by element or normalized) 
+ *             Cuda textures require float coords (by element or normalized)
  *             for sampling, and integer coords (by bytes) for fetching
  *             The name mangling should therefore not interfere with one
  *             another
diff --git a/sycl/test-e2e/format.py b/sycl/test-e2e/format.py
index b979c397cccfe..44a3566a16c54 100644
--- a/sycl/test-e2e/format.py
+++ b/sycl/test-e2e/format.py
@@ -171,7 +171,10 @@ def execute(self, test, litConfig):
         # -that new tests by default would runnable there (unless they have
         # -other restrictions).
         substitutions.append(
-            ("%{build}", "%clangxx -fsycl -fsycl-targets=%{sycl_triple} %verbose_print %s")
+            (
+                "%{build}",
+                "%clangxx -fsycl -fsycl-targets=%{sycl_triple} %verbose_print %s",
+            )
         )
         if platform.system() == "Windows":
             substitutions.append(

From f04cc34d7984402b2b958dd1797682e614fec46e Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Thu, 4 Apr 2024 14:28:48 +0100
Subject: [PATCH 20/34] Format. Use 'call' for device queries.

---
 sycl/source/detail/device_impl.cpp | 57 +++++++++++++-----------------
 sycl/test-e2e/lit.cfg.py           |  2 +-
 2 files changed, 25 insertions(+), 34 deletions(-)

diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp
index e6b5bf866a931..a1d318889e3c3 100644
--- a/sycl/source/detail/device_impl.cpp
+++ b/sycl/source/detail/device_impl.cpp
@@ -584,54 +584,45 @@ bool device_impl::has(aspect Aspect) const {
   }
   case aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm: {
     pi_bool support = PI_FALSE;
-    bool call_successful =
-        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
-            MDevice,
-            PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM,
-            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
-    return call_successful && support;
+    getPlugin()->call<detail::PiApiKind::piDeviceGetInfo>(
+        MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM,
+        sizeof(pi_bool), &support, nullptr);
+    return support;
   }
   case aspect::ext_oneapi_bindless_sampled_image_fetch_1d: {
     pi_bool support = PI_FALSE;
-    bool call_successful =
-        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
-            MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D,
-            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
-    return call_successful && support;
+    getPlugin()->call<detail::PiApiKind::piDeviceGetInfo>(
+        MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D,
+        sizeof(pi_bool), &support, nullptr);
+    return support;
   }
   case aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm: {
     pi_bool support = PI_FALSE;
-    bool call_successful =
-        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
-            MDevice,
-            PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM,
-            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
-    return call_successful && support;
+    getPlugin()->call<detail::PiApiKind::piDeviceGetInfo>(
+        MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM,
+        sizeof(pi_bool), &support, nullptr);
+    return support;
   }
   case aspect::ext_oneapi_bindless_sampled_image_fetch_2d: {
     pi_bool support = PI_FALSE;
-    bool call_successful =
-        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
-            MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D,
-            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
-    return call_successful && support;
+    getPlugin()->call<detail::PiApiKind::piDeviceGetInfo>(
+        MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D,
+        sizeof(pi_bool), &support, nullptr);
+    return support;
   }
   case aspect::ext_oneapi_bindless_sampled_image_fetch_3d_usm: {
     pi_bool support = PI_FALSE;
-    bool call_successful =
-        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
-            MDevice,
-            PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM,
-            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
-    return call_successful && support;
+    getPlugin()->call<detail::PiApiKind::piDeviceGetInfo>(
+        MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM,
+        sizeof(pi_bool), &support, nullptr);
+    return support;
   }
   case aspect::ext_oneapi_bindless_sampled_image_fetch_3d: {
     pi_bool support = PI_FALSE;
-    bool call_successful =
-        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
-            MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D,
-            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
-    return call_successful && support;
+    getPlugin()->call<detail::PiApiKind::piDeviceGetInfo>(
+        MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D,
+        sizeof(pi_bool), &support, nullptr);
+    return support;
   }
   case aspect::ext_intel_esimd: {
     pi_bool support = PI_FALSE;
diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py
index ded4628d61d44..b734e0fcd0acf 100644
--- a/sycl/test-e2e/lit.cfg.py
+++ b/sycl/test-e2e/lit.cfg.py
@@ -378,7 +378,7 @@
     config.substitutions.append(("%shared_lib", "-shared"))
 
 # Check if user passed verbose-print parameter, if yes, add VERBOSE_PRINT macro
-if 'verbose-print' in lit_config.params:
+if "verbose-print" in lit_config.params:
     config.substitutions.append(("%verbose_print", "-DVERBOSE_PRINT"))
 else:
     config.substitutions.append(("%verbose_print", ""))

From 17fa6db82dd19422049129e1e5bd6aa68d86a78c Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Tue, 9 Apr 2024 11:04:01 +0100
Subject: [PATCH 21/34] Update UR repo/tag

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 46 ++++++++++-----------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 610e46e80c203..44f066aa1f03e 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -91,14 +91,29 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
       CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
   endfunction()
 
-  set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
-  # commit 065bf2dd97b58a4ceeb2fb83eed1df9319e61c59
-  # Merge: b9153547 ec773e6c
-  # Author: aarongreig <aaron.greig@codeplay.com>
-  # Date:   Fri Apr 5 14:26:59 2024 +0100
-  #     Merge pull request #1486 from nrspruit/fix_memfree_report
-  #     [L0] Fix DeviceInfo global mem free to report unsupported given MemCount==0
-  set(UNIFIED_RUNTIME_TAG 065bf2dd97b58a4ceeb2fb83eed1df9319e61c59)
+  # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
+  # # commit 065bf2dd97b58a4ceeb2fb83eed1df9319e61c59
+  # # Merge: b9153547 ec773e6c
+  # # Author: aarongreig <aaron.greig@codeplay.com>
+  # # Date:   Fri Apr 5 14:26:59 2024 +0100
+  # #     Merge pull request #1486 from nrspruit/fix_memfree_report
+  # #     [L0] Fix DeviceInfo global mem free to report unsupported given MemCount==0
+  # set(UNIFIED_RUNTIME_TAG 065bf2dd97b58a4ceeb2fb83eed1df9319e61c59)
+
+  set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
+  # commit 4dd5604c3d7a7b2b606c97ba88cffb6363eebe2c
+  # Author: Przemek Malon <przemek.malon@codeplay.com>
+  # Date:   Wed Nov 29 11:25:34 2023 +0000
+  #   [Bindless][Exp] Add device queries for sampled image fetch  
+  #     Added the following queries for device capabilities of fetching sampled
+  #     images:
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
+  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
+  set(UNIFIED_RUNTIME_TAG 4dd5604c3d7a7b2b606c97ba88cffb6363eebe2c)
 
   fetch_adapter_source(level_zero
     ${UNIFIED_RUNTIME_REPO}
@@ -130,21 +145,6 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
     ${UNIFIED_RUNTIME_TAG}
   )
 
-  set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 4dd5604c3d7a7b2b606c97ba88cffb6363eebe2c
-  # Author: Przemek Malon <przemek.malon@codeplay.com>
-  # Date:   Wed Nov 29 11:25:34 2023 +0000
-  #   [Bindless][Exp] Add device queries for sampled image fetch  
-  #     Added the following queries for device capabilities of fetching sampled
-  #     images:
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG 4dd5604c3d7a7b2b606c97ba88cffb6363eebe2c)
-
   if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
     set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")
   endif()

From 54d18128f2a15582b382f43d40403a71a8903219 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Tue, 9 Apr 2024 11:48:55 +0100
Subject: [PATCH 22/34] Undo removal of comment

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 44f066aa1f03e..3ff10a7dd58b1 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -56,6 +56,9 @@ endif()
 if(SYCL_PI_UR_USE_FETCH_CONTENT)
   include(FetchContent)
 
+  # The fetch_adapter_source function can be used to perform a separate content
+  # fetch for a UR adapter, this allows development of adapters to be decoupled
+  # from each other.
   #
   # A separate content fetch will not be performed if:
   # * The adapter name is not present in the SYCL_ENABLE_PLUGINS variable.

From ff907c2a9712e60dafe349a251acfaed5186ffdb Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Tue, 9 Apr 2024 11:54:19 +0100
Subject: [PATCH 23/34] Revert aspect queries to call_nocheck

---
 sycl/source/detail/device_impl.cpp | 57 +++++++++++++++++-------------
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp
index a1d318889e3c3..e6b5bf866a931 100644
--- a/sycl/source/detail/device_impl.cpp
+++ b/sycl/source/detail/device_impl.cpp
@@ -584,45 +584,54 @@ bool device_impl::has(aspect Aspect) const {
   }
   case aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm: {
     pi_bool support = PI_FALSE;
-    getPlugin()->call<detail::PiApiKind::piDeviceGetInfo>(
-        MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM,
-        sizeof(pi_bool), &support, nullptr);
-    return support;
+    bool call_successful =
+        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
+            MDevice,
+            PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM,
+            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
+    return call_successful && support;
   }
   case aspect::ext_oneapi_bindless_sampled_image_fetch_1d: {
     pi_bool support = PI_FALSE;
-    getPlugin()->call<detail::PiApiKind::piDeviceGetInfo>(
-        MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D,
-        sizeof(pi_bool), &support, nullptr);
-    return support;
+    bool call_successful =
+        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
+            MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D,
+            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
+    return call_successful && support;
   }
   case aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm: {
     pi_bool support = PI_FALSE;
-    getPlugin()->call<detail::PiApiKind::piDeviceGetInfo>(
-        MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM,
-        sizeof(pi_bool), &support, nullptr);
-    return support;
+    bool call_successful =
+        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
+            MDevice,
+            PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM,
+            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
+    return call_successful && support;
   }
   case aspect::ext_oneapi_bindless_sampled_image_fetch_2d: {
     pi_bool support = PI_FALSE;
-    getPlugin()->call<detail::PiApiKind::piDeviceGetInfo>(
-        MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D,
-        sizeof(pi_bool), &support, nullptr);
-    return support;
+    bool call_successful =
+        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
+            MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D,
+            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
+    return call_successful && support;
   }
   case aspect::ext_oneapi_bindless_sampled_image_fetch_3d_usm: {
     pi_bool support = PI_FALSE;
-    getPlugin()->call<detail::PiApiKind::piDeviceGetInfo>(
-        MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM,
-        sizeof(pi_bool), &support, nullptr);
-    return support;
+    bool call_successful =
+        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
+            MDevice,
+            PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM,
+            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
+    return call_successful && support;
   }
   case aspect::ext_oneapi_bindless_sampled_image_fetch_3d: {
     pi_bool support = PI_FALSE;
-    getPlugin()->call<detail::PiApiKind::piDeviceGetInfo>(
-        MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D,
-        sizeof(pi_bool), &support, nullptr);
-    return support;
+    bool call_successful =
+        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
+            MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D,
+            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
+    return call_successful && support;
   }
   case aspect::ext_intel_esimd: {
     pi_bool support = PI_FALSE;

From d80630b7d5f99e8f9962a60c11608cd8658f49ff Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Tue, 9 Apr 2024 19:04:19 +0100
Subject: [PATCH 24/34] Update UR tag

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 8159bdb656bfd..0a059746d4c3a 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   endfunction()
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 4dd5604c3d7a7b2b606c97ba88cffb6363eebe2c
+  # commit d05b206d62329b67a36708127afb052c4f3f273e
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG 4dd5604c3d7a7b2b606c97ba88cffb6363eebe2c)
+  set(UNIFIED_RUNTIME_TAG d05b206d62329b67a36708127afb052c4f3f273e)
 
   # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
   # # commit a7c202b49aff130f60da0c03916d08bb22b91aa0

From 773b49cf36cb7ab5ae882e7d1e39d54a0003f96b Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Wed, 10 Apr 2024 10:38:25 +0100
Subject: [PATCH 25/34] Update UR tag

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 0a059746d4c3a..7aa8bc5b04bfc 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   endfunction()
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit d05b206d62329b67a36708127afb052c4f3f273e
+  # commit 4202d207359b14d08e821d28d36608a5605f1ace
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG d05b206d62329b67a36708127afb052c4f3f273e)
+  set(UNIFIED_RUNTIME_TAG 4202d207359b14d08e821d28d36608a5605f1ace)
 
   # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
   # # commit a7c202b49aff130f60da0c03916d08bb22b91aa0

From 0fc3629e5334687685b413c746510b38f84f3736 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Wed, 10 Apr 2024 11:17:14 +0100
Subject: [PATCH 26/34] Update UR tag

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 7aa8bc5b04bfc..423a5a03b3280 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   endfunction()
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 4202d207359b14d08e821d28d36608a5605f1ace
+  # commit 916e458b7f45c147061ef3049c78c649a12a2d23
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG 4202d207359b14d08e821d28d36608a5605f1ace)
+  set(UNIFIED_RUNTIME_TAG 916e458b7f45c147061ef3049c78c649a12a2d23)
 
   # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
   # # commit a7c202b49aff130f60da0c03916d08bb22b91aa0

From a9cb08a51c13c255216cca86f6dd49e1a0eaf0db Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Wed, 10 Apr 2024 11:23:39 +0100
Subject: [PATCH 27/34] Update UR tag

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 423a5a03b3280..b9af8dfa6dd38 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   endfunction()
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 916e458b7f45c147061ef3049c78c649a12a2d23
+  # commit 8fe880bf49d1e428421c3913e6f23cbe367755c0
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG 916e458b7f45c147061ef3049c78c649a12a2d23)
+  set(UNIFIED_RUNTIME_TAG 8fe880bf49d1e428421c3913e6f23cbe367755c0)
 
   # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
   # # commit a7c202b49aff130f60da0c03916d08bb22b91aa0

From 711109f7bffa359d7bcd2a12ebdb0e1c9e129e69 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Wed, 10 Apr 2024 12:03:02 +0100
Subject: [PATCH 28/34] Update UR tag

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index b9af8dfa6dd38..13b70402c7109 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   endfunction()
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 8fe880bf49d1e428421c3913e6f23cbe367755c0
+  # commit 60b8afe176a8ab6efd02479120ac5d9b3f460b0b
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG 8fe880bf49d1e428421c3913e6f23cbe367755c0)
+  set(UNIFIED_RUNTIME_TAG 60b8afe176a8ab6efd02479120ac5d9b3f460b0b)
 
   # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
   # # commit a7c202b49aff130f60da0c03916d08bb22b91aa0

From e59df310e88554d556d346aa6dd5970f468c5789 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Wed, 10 Apr 2024 14:37:03 +0100
Subject: [PATCH 29/34] Update UR tag

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 13b70402c7109..973a7755e5fe7 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   endfunction()
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 60b8afe176a8ab6efd02479120ac5d9b3f460b0b
+  # commit 69e70cb716a162b60c17ff677850c5e43cb3523e
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG 60b8afe176a8ab6efd02479120ac5d9b3f460b0b)
+  set(UNIFIED_RUNTIME_TAG 69e70cb716a162b60c17ff677850c5e43cb3523e)
 
   # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
   # # commit a7c202b49aff130f60da0c03916d08bb22b91aa0

From 94a43048465e6fb61a1abab900c7a0570bd7ae4f Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Wed, 10 Apr 2024 16:48:50 +0100
Subject: [PATCH 30/34] Update UR tag

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 69980c6d661e9..f3f2cfd2e1a9d 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   endfunction()
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 69e70cb716a162b60c17ff677850c5e43cb3523e
+  # commit 0ead0ca13cb59c3273ab3add872f55d52c900c97
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG 69e70cb716a162b60c17ff677850c5e43cb3523e)
+  set(UNIFIED_RUNTIME_TAG 0ead0ca13cb59c3273ab3add872f55d52c900c97)
 
   # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
   # # commit a7c202b49aff130f60da0c03916d08bb22b91aa0

From d559f35ed863203441f3d78ad093cb508d59c9c9 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Wed, 10 Apr 2024 17:56:06 +0100
Subject: [PATCH 31/34] Update UR tag

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 8559852b07389..2c3f396b87315 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   endfunction()
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 0ead0ca13cb59c3273ab3add872f55d52c900c97
+  # commit 323fd84406375735f8bb0de091accf63c1ba8c1e
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG 0ead0ca13cb59c3273ab3add872f55d52c900c97)
+  set(UNIFIED_RUNTIME_TAG 323fd84406375735f8bb0de091accf63c1ba8c1e)
   
   # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
   # # commit e00a764f8dafd1319b636aa99c15601ec0d4d7fc

From 1f45ea1b915a9a639782457e0a0a04cfe475f9f5 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Wed, 10 Apr 2024 18:34:16 +0100
Subject: [PATCH 32/34] Update UR tag

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 8389686a25193..e8ea88da6f9d1 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -95,7 +95,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   endfunction()
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 323fd84406375735f8bb0de091accf63c1ba8c1e
+  # commit 21f023f386c3352d11ceb16959f8b54a2be34757
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #   [Bindless][Exp] Add device queries for sampled image fetch  
@@ -107,7 +107,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
-  set(UNIFIED_RUNTIME_TAG 323fd84406375735f8bb0de091accf63c1ba8c1e)
+  set(UNIFIED_RUNTIME_TAG 21f023f386c3352d11ceb16959f8b54a2be34757)
   
   # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
   # # commit e00a764f8dafd1319b636aa99c15601ec0d4d7fc

From cb9cd5f77c8367fdf19775e2c7866eb140aa757e Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Tue, 30 Apr 2024 13:27:50 +0100
Subject: [PATCH 33/34] Update UR Tag

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 0be29ca0f614b..35a19806c5361 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -104,7 +104,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   # set(UNIFIED_RUNTIME_TAG 717791bfc636a1cd69f95f09fea15fcd70a9fb23)
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 8f62b5add090372214a0f872fa6630d3bdaff14c
+  # commit 56142ad69cdc34c4e090a852fd3c599446143ba2
   # Author: Przemek Malon <przemek.malon@codeplay.com>
   # Date:   Wed Nov 29 11:25:34 2023 +0000
   #    [Bindless][Exp] Add device queries for sampled image fetch
@@ -118,7 +118,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
   #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP  
-  set(UNIFIED_RUNTIME_TAG 8f62b5add090372214a0f872fa6630d3bdaff14c)
+  set(UNIFIED_RUNTIME_TAG 56142ad69cdc34c4e090a852fd3c599446143ba2)
 
   fetch_adapter_source(level_zero
     ${UNIFIED_RUNTIME_REPO}

From 0e55b21b2dd954cd0c371fb410f120ac5c9a8555 Mon Sep 17 00:00:00 2001
From: Przemek Malon <przemek.malon@codeplay.com>
Date: Thu, 2 May 2024 11:23:11 +0100
Subject: [PATCH 34/34] Update UR Tag

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 32 ++++++---------------
 1 file changed, 8 insertions(+), 24 deletions(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 35a19806c5361..19ae36bdc46a4 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -94,31 +94,15 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
       CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
   endfunction()
 
-  # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
-  # # commit 717791bfc636a1cd69f95f09fea15fcd70a9fb23
-  # # Merge: 601062ba f8c4facd
-  # # Author: aarongreig <aaron.greig@codeplay.com>
-  # # Date:   Fri Apr 19 10:35:14 2024 +0100
-  # #    Merge pull request #1517 from nrspruit/fix_l0_coverity_sync
-  # #    [L0] Store LastCommandEvent before unlock during queue sync
-  # set(UNIFIED_RUNTIME_TAG 717791bfc636a1cd69f95f09fea15fcd70a9fb23)
-
-  set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
-  # commit 56142ad69cdc34c4e090a852fd3c599446143ba2
-  # Author: Przemek Malon <przemek.malon@codeplay.com>
-  # Date:   Wed Nov 29 11:25:34 2023 +0000
+  set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
+  # commit ebf873fb5996c9ddca32bbb7c9330d3ffe15473c
+  # Merge: 633ec408 8f375039
+  # Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
+  # Date:   Thu May 2 10:56:55 2024 +0100
+  #    Merge pull request #1535 from przemektmalon/przemek/sampled-image-fetch
+  #    
   #    [Bindless][Exp] Add device queries for sampled image fetch
-  #
-  #    Added the following queries for device capabilities of fetching
-  #    sampled images:
-  #
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
-  #     - DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP  
-  set(UNIFIED_RUNTIME_TAG 56142ad69cdc34c4e090a852fd3c599446143ba2)
+  set(UNIFIED_RUNTIME_TAG ebf873fb5996c9ddca32bbb7c9330d3ffe15473c)
 
   fetch_adapter_source(level_zero
     ${UNIFIED_RUNTIME_REPO}