Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/sycl' into georgi/sycl-cuda-ou…
Browse files Browse the repository at this point in the history
…t-of-resources-registers-error
  • Loading branch information
GeorgeWeb committed Feb 6, 2024
2 parents 4d6eb99 + bd86f04 commit 7e258e9
Show file tree
Hide file tree
Showing 21 changed files with 1,105 additions and 64 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/sycl-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
- name: OCL CPU (Intel/GEN12)
runner: '["Linux", "gen12"]'
image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:latest
image_options: -u 1001
image_options: -u 1001 --privileged --cap-add SYS_ADMIN
target_devices: opencl:cpu

- name: OCL CPU (Intel/Arc)
Expand Down
8 changes: 6 additions & 2 deletions sycl/include/sycl/detail/builtins/relational_functions.inc
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,12 @@ BUILTIN_REL(ONE_ARG, signbit, __spirv_SignBitSet)
#undef BUILTIN_REL

#ifdef __SYCL_DEVICE_ONLY__
DEVICE_IMPL_TEMPLATE(THREE_ARGS, bitselect, builtin_enable_generic_t,
__spirv_ocl_bitselect)
DEVICE_IMPL_TEMPLATE(
THREE_ARGS, bitselect, builtin_enable_generic_t, [](auto... xs) {
using ret_ty = detail::builtin_enable_generic_t<THREE_ARGS_TEMPLATE_TYPE>;
using detail::builtins::convert_result;
return convert_result<ret_ty>(__spirv_ocl_bitselect(xs...));
})
#else
HOST_IMPL_TEMPLATE(THREE_ARGS, bitselect, builtin_enable_generic_t, rel,
default_ret_type)
Expand Down
37 changes: 21 additions & 16 deletions sycl/include/sycl/ext/intel/esimd/detail/simd_obj_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,23 @@ constexpr vector_type_t<T, N> make_vector(const T (&&Arr)[N]) {
}

template <class T, int N, size_t... Is>
constexpr vector_type_t<T, N> make_vector_impl(T Base, T Stride,
std::index_sequence<Is...>) {
return vector_type_t<T, N>{(T)(Base + ((T)Is) * Stride)...};
constexpr auto make_vector_impl(T Base, T Stride, std::index_sequence<Is...>) {
if constexpr (std::is_integral_v<T> && N <= 3) {
// This sequence is a bit more efficient for integral types and N <= 3.
return vector_type_t<T, N>{(T)(Base + ((T)Is) * Stride)...};
} else {
using CppT = typename element_type_traits<T>::EnclosingCppT;
CppT BaseCpp = Base;
CppT StrideCpp = Stride;
vector_type_t<CppT, N> VBase = BaseCpp;
vector_type_t<CppT, N> VStride = StrideCpp;
vector_type_t<CppT, N> VStrideCoef{(CppT)(Is)...};
vector_type_t<CppT, N> Result{VBase + VStride * VStrideCoef};
return wrapper_type_converter<T>::template to_vector<N>(Result);
}
}

template <class T, int N>
constexpr vector_type_t<T, N> make_vector(T Base, T Stride) {
template <class T, int N> constexpr auto make_vector(T Base, T Stride) {
return make_vector_impl<T, N>(Base, Stride, std::make_index_sequence<N>{});
}

Expand Down Expand Up @@ -265,18 +275,13 @@ class [[__sycl_detail__::__uses_aspects__(
/// are initialized with the arithmetic progression defined by the arguments.
/// For example, <code>simd<int, 4> x(1, 3)</code> will initialize x to the
/// <code>{1, 4, 7, 10}</code> sequence.
/// @param Val The start of the progression.
/// If Ty is a floating-point type and \p Base or \p Step is +/-inf or nan,
/// then this constructor has undefined behavior.
/// @param Base The start of the progression.
/// @param Step The step of the progression.
simd_obj_impl(Ty Val, Ty Step) noexcept {
__esimd_dbg_print(simd_obj_impl(Ty Val, Ty Step));
if constexpr (is_wrapper_elem_type_v<Ty> || !std::is_integral_v<Ty>) {
for (int i = 0; i < N; ++i) {
M_data[i] = bitcast_to_raw_type(Val);
Val = binary_op<BinOp::add, Ty>(Val, Step);
}
} else {
M_data = make_vector<Ty, N>(Val, Step);
}
simd_obj_impl(Ty Base, Ty Step) noexcept {
__esimd_dbg_print(simd_obj_impl(Ty Base, Ty Step));
M_data = make_vector<Ty, N>(Base, Step);
}

/// Broadcast constructor. Given value is type-converted to the
Expand Down
317 changes: 314 additions & 3 deletions sycl/include/sycl/ext/intel/esimd/memory.hpp

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions sycl/source/detail/pi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,14 @@ getBinaryImageFormat(const unsigned char *ImgData, size_t ImgSize) {
return PI_DEVICE_BINARY_TYPE_NATIVE;
}

if (MatchMagicNumber(std::array{'!', '<', 'a', 'r', 'c', 'h', '>', '\n'}))
// "ar" format is used to pack binaries for multiple devices, e.g. via
//
// -Xsycl-target-backend=spir64_gen "-device acm-g10,acm-g11"
//
// option.
return PI_DEVICE_BINARY_TYPE_NATIVE;

return PI_DEVICE_BINARY_TYPE_NONE;
}

Expand Down
58 changes: 58 additions & 0 deletions sycl/test-e2e/Basic/built-ins/marray_relational.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,64 @@ int main() {
TEST2(sycl::any, int, EXPECTED(bool, false), 3, ma7);
TEST(sycl::bitselect, float, EXPECTED(float, 1.0, 1.0), 2, ma8, ma9, ma10);
TEST(sycl::select, float, EXPECTED(float, 1.0, 2.0, 8.0), 3, ma5, ma6, c);
{
// Extra tests for select/bitselect due to special handling required for
// integer return types.

auto Test = [&](auto F, auto Expected, auto... Args) {
std::tuple ArgsTuple{Args...};
auto Result = std::apply(F, ArgsTuple);
static_assert(std::is_same_v<decltype(Expected), decltype(Result)>);

auto Equal = [](auto x, auto y) {
for (size_t i = 0; i < x.size(); ++i)
if (x[i] != y[i])
return false;

return true;
};

assert(Equal(Result, Expected));

sycl::buffer<bool, 1> ResultBuf{1};
deviceQueue.submit([&](sycl::handler &cgh) {
sycl::accessor Result{ResultBuf, cgh};
cgh.single_task([=]() {
auto R = std::apply(F, ArgsTuple);
static_assert(std::is_same_v<decltype(Expected), decltype(R)>);
Result[0] = Equal(R, Expected);
});
});
assert(sycl::host_accessor{ResultBuf}[0]);
};

sycl::marray<char, 2> a{0b1100, 0b0011};
sycl::marray<char, 2> b{0b0011, 0b1100};
sycl::marray<char, 2> c{0b1010, 0b1010};
sycl::marray<char, 2> r{0b0110, 0b1001};

auto BitSelect = [](auto... xs) { return sycl::bitselect(xs...); };
Test(BitSelect, r, a, b, c);
// Input values/results above are positive, so use the same values for
// signed/unsigned char tests.
[&](auto... xs) {
Test(BitSelect, sycl::marray<signed char, 2>{xs}...);
}(r, a, b, c);
[&](auto... xs) {
Test(BitSelect, sycl::marray<unsigned char, 2>{xs}...);
}(r, a, b, c);

auto Select = [](auto... xs) { return sycl::select(xs...); };
sycl::marray<bool, 2> c2{false, true};
sycl::marray<char, 2> r2{a[0], b[1]};
Test(Select, r2, a, b, c2);
[&](auto... xs) {
Test(Select, sycl::marray<signed char, 2>{xs}..., c2);
}(r2, a, b);
[&](auto... xs) {
Test(Select, sycl::marray<unsigned char, 2>{xs}..., c2);
}(r2, a, b);
}

return 0;
}
59 changes: 59 additions & 0 deletions sycl/test-e2e/Basic/built-ins/scalar_relational.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes -o %t_preview.out %}
// RUN: %if preview-breaking-changes-supported %{ %{run} %t_preview.out%}

#include <sycl/sycl.hpp>

template <typename... Ts, typename FuncTy> void TestTypes(FuncTy F) {
(F(Ts{}), ...);
}

int main() {
sycl::queue q;

auto Test = [&](auto F, auto Expected, auto... Args) {
#if defined(__GNUC__) || defined(__clang__)
std::cout << __PRETTY_FUNCTION__ << std::endl;
#endif
std::tuple ArgsTuple{Args...};
auto Result = std::apply(F, ArgsTuple);
static_assert(std::is_same_v<decltype(Expected), decltype(Result)>);
assert(Expected == Result);

sycl::buffer<bool, 1> ResultBuf{1};
q.submit([&](sycl::handler &cgh) {
sycl::accessor Result{ResultBuf, cgh};
cgh.single_task([=]() {
auto R = std::apply(F, ArgsTuple);
static_assert(std::is_same_v<decltype(Expected), decltype(R)>);
Result[0] = Expected == R;
});
});
assert(sycl::host_accessor{ResultBuf}[0]);
};

auto TestBitSelect = [&](auto type_val) {
using T = decltype(type_val);
auto BitSelect = [](auto... xs) { return sycl::bitselect(xs...); };

static_assert(std::is_integral_v<T>,
"Only integer test is implemented here!");
Test(BitSelect, T{0b0110}, T{0b1100}, T{0b0011}, T{0b1010});
};

TestTypes<signed char, unsigned char, char, long, long long, unsigned long,
unsigned long long>(TestBitSelect);

auto TestSelect = [&](auto type_val) {
using T = decltype(type_val);
auto Select = [](auto... xs) { return sycl::select(xs...); };

Test(Select, T{0}, T{1}, T{0}, true);
Test(Select, T{1}, T{1}, T{0}, false);
};

TestTypes<signed char, unsigned char, char>(TestSelect);

return 0;
}
64 changes: 64 additions & 0 deletions sycl/test-e2e/Basic/built-ins/vec_relational.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,70 @@ int main() {
TEST2(sycl::any, int, EXPECTED(int32_t, 0), 3, va7);
TEST(sycl::bitselect, float, EXPECTED(float, 1.0, 1.0), 2, va8, va9, va10);
TEST(sycl::select, float, EXPECTED(float, 1.0, 2.0, 8.0), 3, va5, va6, c1);
{
// Extra tests for select/bitselect due to special handling required for
// integer return types.

auto Test = [&](auto F, auto Expected, auto... Args) {
std::tuple ArgsTuple{Args...};
auto Result = std::apply(F, ArgsTuple);
static_assert(std::is_same_v<decltype(Expected), decltype(Result)>);

// Note: operator==(vec, vec) return vec.
auto Equal = [](auto x, auto y) {
for (size_t i = 0; i < x.size(); ++i)
if (x[i] != y[i])
return false;

return true;
};

assert(Equal(Result, Expected));

sycl::buffer<bool, 1> ResultBuf{1};
deviceQueue.submit([&](sycl::handler &cgh) {
sycl::accessor Result{ResultBuf, cgh};
cgh.single_task([=]() {
auto R = std::apply(F, ArgsTuple);
static_assert(std::is_same_v<decltype(Expected), decltype(R)>);
Result[0] = Equal(R, Expected);
});
});
assert(sycl::host_accessor{ResultBuf}[0]);
};

// Note that only int8_t/uint8_t are supported by the bitselect/select
// builtins and not all three char data types. Also, use positive numbers
// for the values below so that we could use the same for both
// signed/unsigned tests.
sycl::vec<uint8_t, 2> a{0b1100, 0b0011};
sycl::vec<uint8_t, 2> b{0b0011, 0b1100};
sycl::vec<uint8_t, 2> c{0b1010, 0b1010};
sycl::vec<uint8_t, 2> r{0b0110, 0b1001};

auto BitSelect = [](auto... xs) { return sycl::bitselect(xs...); };
Test(BitSelect, r, a, b, c);
[&](auto... xs) {
Test(BitSelect, xs.template as<sycl::vec<int8_t, 2>>()...);
}(r, a, b, c);

auto Select = [](auto... xs) { return sycl::select(xs...); };
sycl::vec<uint8_t, 2> c2{0x7F, 0xFF};
sycl::vec<uint8_t, 2> r2{a[0], b[1]};

Test(Select, r2, a, b, c2);
[&](auto... xs) {
Test(Select, xs.template as<sycl::vec<int8_t, 2>>()..., c2);
}(r2, a, b);

// Assume that MSB of a signed data type is the leftmost bit (signbit).
auto c3 = c2.template as<sycl::vec<int8_t, 2>>();

Test(Select, r2, a, b, c3);
[&](auto... xs) {
Test(Select, xs.template as<sycl::vec<int8_t, 2>>()..., c3);
}(r2, a, b);
}

#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
TEST(sycl::isequal, int32_t, EXPECTED(int32_t, 1, 1), 2, va11.swizzle<0, 1>(),
Expand Down
16 changes: 3 additions & 13 deletions sycl/test-e2e/ESIMD/api/functional/ctors/ctor_fill.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,18 +246,8 @@ class run_test {
});
queue.wait_and_throw();

// Verify the base value was passed as-is
if (!are_bitwise_equal(result[0], base_value)) {
passed = false;
log::fail(TestDescriptionT(data_type, BaseVal, Step),
"Unexpected value at index 0, retrieved: ", result[0],
", expected: ", base_value);
}

// Verify the step value works as expected being passed to the fill
// constructor.
DataT expected_value = base_value;
for (size_t i = 1; i < result.size(); ++i) {
// Verify the the fill constructor.
for (size_t i = 0; i < result.size(); ++i) {
if constexpr (BaseVal == init_val::nan || Step == init_val::nan) {

if (!std::isnan(result[i])) {
Expand All @@ -268,7 +258,7 @@ class run_test {
}
} else {

expected_value += step_value;
DataT expected_value = base_value + (DataT)i * step_value;
if (!are_bitwise_equal(result[i], expected_value)) {
passed = false;
log::fail(TestDescriptionT(data_type, BaseVal, Step),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
// The test verifies that simd fill constructor has no precision differences.
// The test do the following actions:
// - call simd with predefined base and step values
// - bitwise comparing that output[0] value is equal to base value and
// output[i] is equal to output[i -1] + step_value
// - bitwise comparing that output[i] is equal to base + i * step_value.

#include "ctor_fill.hpp"

Expand Down
19 changes: 14 additions & 5 deletions sycl/test-e2e/ESIMD/api/functional/ctors/ctor_fill_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// TODO: remove fno-fast-math option once the issue is investigated and the test
// is fixed.
// DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}
// RUN: %{build} -fsycl-device-code-split=per_kernel %{mathflags} -o %t.out
// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out
// RUN: %{run} %t.out
//
// Test for simd fill constructor for core types.
Expand Down Expand Up @@ -112,10 +109,22 @@ int main(int, char **) {
}
{
const auto types = get_tested_types<tested_types::fp>();
{
const auto base_values =
ctors::get_init_values_pack<init_val::negative>();
const auto step_values =
ctors::get_init_values_pack<init_val::positive>();
passed &= for_all_combinations<ctors::run_test>(
types, sizes, contexts, base_values, step_values, queue);
}
// The test cases below have never been guaranteed to work some certain
// way with base and step values set to inf or non. They may or may not
// work as expected by the checks in this test.
{
const auto base_values =
ctors::get_init_values_pack<init_val::neg_inf>();
const auto step_values = ctors::get_init_values_pack<init_val::max>();
const auto step_values =
ctors::get_init_values_pack<init_val::positive>();
passed &= for_all_combinations<ctors::run_test>(
types, sizes, contexts, base_values, step_values, queue);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// TODO: remove fno-fast-math option once the issue is investigated and the test
// is fixed.
// DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}
// RUN: %{build} -fsycl-device-code-split=per_kernel %{mathflags} -o %t.out
// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out
// RUN: %{run} %t.out
//
// Test for simd fill constructor for extra fp types.
Expand Down
6 changes: 3 additions & 3 deletions sycl/test-e2e/ESIMD/lsc/Inputs/lsc_slm_load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ bool test(queue Q, uint32_t PMask = ~0) {
uint32_t LID = I % (LocalRange * VL * NChannels);
uint32_t GID = I / VL;
bool Pred = (GID & 0x1) == 0;
T ExpectedVal = GroupId * 1000000 + LID;
Tuint ExpectedVal = GroupId * 1000000 + LID;
if (TestMergeOperand && !Pred)
ExpectedVal = GID + (I % VL);
ExpectedVal = sycl::bit_cast<Tuint>((T)(GID + (I % VL)));

if (Out[I] != ExpectedVal && NErrors++ < 32) {
if (sycl::bit_cast<Tuint>(Out[I]) != ExpectedVal && NErrors++ < 32) {
std::cout << "Error: " << I << ": Value = " << Out[I]
<< ", Expected value = " << ExpectedVal << std::endl;
}
Expand Down
Loading

0 comments on commit 7e258e9

Please sign in to comment.