From 73cf85dc17dc95215fbd726ba9e314fda832b1a8 Mon Sep 17 00:00:00 2001 From: Ouadie EL FAROUKI Date: Fri, 14 Jun 2024 07:07:25 +0100 Subject: [PATCH] [SYCL][COMPAT] Add math extend_vcompare[2/4] to SYCLCompat (#14079) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds math `extend_vcompare[2/4] `operators (4 in total) along with unit-tests for signed and unsigned int32 cases. Also, Unit-tests from previous `extend_v*4` #14078 and `extend_v*2` #13953 are moved to two different files. --------- Co-authored-by: Alberto Cabrera PĂ©rez Co-authored-by: Joe Todd Co-authored-by: Yihan Wang --- sycl/doc/syclcompat/README.md | 67 +++ sycl/include/syclcompat/math.hpp | 66 +++ .../syclcompat/math/math_extend_v_2.cpp | 464 ++++++++++++++++++ ...{math_extend_v.cpp => math_extend_v_4.cpp} | 416 +++------------- 4 files changed, 678 insertions(+), 335 deletions(-) create mode 100644 sycl/test-e2e/syclcompat/math/math_extend_v_2.cpp rename sycl/test-e2e/syclcompat/math/{math_extend_v.cpp => math_extend_v_4.cpp} (56%) diff --git a/sycl/doc/syclcompat/README.md b/sycl/doc/syclcompat/README.md index 1b7f0ab003bf2..1fd47cde4c517 100644 --- a/sycl/doc/syclcompat/README.md +++ b/sycl/doc/syclcompat/README.md @@ -2416,6 +2416,73 @@ template inline constexpr RetT extend_vavrg4_sat(AT a, BT b, RetT c); ``` +Vectorized comparison APIs also provided in the math header behave similarly +and support a `std` comparison operator parameter which can be `greater`, +`less`, `greater_equal`, `less_equal`, `equal_to` or `not_equal_to`. These APIs +cover both the 2-elements *(16-bits each)* and 4-elements *(8-bits each)* +variants, as well as an additional `_add` variant that computes the sum of the +2/4 output elements. + +```cpp +/// Extend \p a and \p b to 33 bit and vectorized compare input values using +/// specified comparison \p cmp . +/// +/// \tparam [in] AT The type of the first value, can only be 32 bit integer +/// \tparam [in] BT The type of the second value, can only be 32 bit integer +/// \tparam [in] BinaryOperation The type of the compare operation +/// \param [in] a The first value +/// \param [in] b The second value +/// \param [in] cmp The comparsion operator +/// \returns The comparison result of the two extended values. +template +inline constexpr unsigned extend_vcompare2(AT a, BT b, BinaryOperation cmp); + +/// Extend Inputs to 33 bit, and vectorized compare input values using specified +/// comparison \p cmp , then add the result with \p c . +/// +/// \tparam [in] AT The type of the first value, can only be 32 bit integer +/// \tparam [in] BT The type of the second value, can only be 32 bit integer +/// \tparam [in] BinaryOperation The type of the compare operation +/// \param [in] a The first value +/// \param [in] b The second value +/// \param [in] c The third value +/// \param [in] cmp The comparsion operator +/// \returns The comparison result of the two extended values, and add the +/// result with \p c . +template +inline constexpr unsigned extend_vcompare2_add(AT a, BT b, unsigned c, + BinaryOperation cmp); + +/// Extend \p a and \p b to 33 bit and vectorized compare input values using +/// specified comparison \p cmp . +/// +/// \tparam [in] AT The type of the first value, can only be 32 bit integer +/// \tparam [in] BT The type of the second value, can only be 32 bit integer +/// \tparam [in] BinaryOperation The type of the compare operation +/// \param [in] a The first value +/// \param [in] b The second value +/// \param [in] cmp The comparsion operator +/// \returns The comparison result of the two extended values. +template +inline constexpr unsigned extend_vcompare4(AT a, BT b, BinaryOperation cmp); + +/// Extend Inputs to 33 bit, and vectorized compare input values using specified +/// comparison \p cmp , then add the result with \p c . +/// +/// \tparam [in] AT The type of the first value, can only be 32 bit integer +/// \tparam [in] BT The type of the second value, can only be 32 bit integer +/// \tparam [in] BinaryOperation The type of the compare operation +/// \param [in] a The first value +/// \param [in] b The second value +/// \param [in] c The third value +/// \param [in] cmp The comparsion operator +/// \returns The comparison result of the two extended values, and add the +/// result with \p c . +template +inline constexpr unsigned extend_vcompare4_add(AT a, BT b, unsigned c, + BinaryOperation cmp); +``` + The math header file provides APIs for bit-field insertion (`bfi_safe`) and bit-field extraction (`bfe_safe`). These are bounds-checked variants of underlying `detail` APIs (`detail::bfi`, `detail::bfe`) which, in future diff --git a/sycl/include/syclcompat/math.hpp b/sycl/include/syclcompat/math.hpp index 91990b6585fc8..2f9ac8c0f1b4c 100644 --- a/sycl/include/syclcompat/math.hpp +++ b/sycl/include/syclcompat/math.hpp @@ -1856,6 +1856,39 @@ inline constexpr RetT extend_vavrg2_sat(AT a, BT b, RetT c) { return detail::extend_vbinary2(a, b, c, detail::average()); } +/// Extend \p a and \p b to 33 bit and vectorized compare input values using +/// specified comparison \p cmp . +/// +/// \tparam [in] AT The type of the first value, can only be 32 bit integer +/// \tparam [in] BT The type of the second value, can only be 32 bit integer +/// \tparam [in] BinaryOperation The type of the compare operation +/// \param [in] a The first value +/// \param [in] b The second value +/// \param [in] cmp The comparsion operator +/// \returns The comparison result of the two extended values. +template +inline constexpr unsigned extend_vcompare2(AT a, BT b, BinaryOperation cmp) { + return detail::extend_vbinary2(a, b, 0, cmp); +} + +/// Extend Inputs to 33 bit, and vectorized compare input values using specified +/// comparison \p cmp , then add the result with \p c . +/// +/// \tparam [in] AT The type of the first value, can only be 32 bit integer +/// \tparam [in] BT The type of the second value, can only be 32 bit integer +/// \tparam [in] BinaryOperation The type of the compare operation +/// \param [in] a The first value +/// \param [in] b The second value +/// \param [in] c The third value +/// \param [in] cmp The comparsion operator +/// \returns The comparison result of the two extended values, and add the +/// result with \p c . +template +inline constexpr unsigned extend_vcompare2_add(AT a, BT b, unsigned c, + BinaryOperation cmp) { + return detail::extend_vbinary2(a, b, c, cmp); +} + /// Compute vectorized addition of \p a and \p b, with each value treated as a /// 4 elements vector type and extend each element to 9 bit. /// \tparam [in] RetT The type of the return value, can only be 32 bit integer @@ -2121,4 +2154,37 @@ inline constexpr RetT extend_vavrg4_sat(AT a, BT b, RetT c) { return detail::extend_vbinary4(a, b, c, detail::average()); } +/// Extend \p a and \p b to 33 bit and vectorized compare input values using +/// specified comparison \p cmp . +/// +/// \tparam [in] AT The type of the first value, can only be 32 bit integer +/// \tparam [in] BT The type of the second value, can only be 32 bit integer +/// \tparam [in] BinaryOperation The type of the compare operation +/// \param [in] a The first value +/// \param [in] b The second value +/// \param [in] cmp The comparsion operator +/// \returns The comparison result of the two extended values. +template +inline constexpr unsigned extend_vcompare4(AT a, BT b, BinaryOperation cmp) { + return detail::extend_vbinary4(a, b, 0, cmp); +} + +/// Extend Inputs to 33 bit, and vectorized compare input values using specified +/// comparison \p cmp , then add the result with \p c . +/// +/// \tparam [in] AT The type of the first value, can only be 32 bit integer +/// \tparam [in] BT The type of the second value, can only be 32 bit integer +/// \tparam [in] BinaryOperation The type of the compare operation +/// \param [in] a The first value +/// \param [in] b The second value +/// \param [in] c The third value +/// \param [in] cmp The comparsion operator +/// \returns The comparison result of the two extended values, and add the +/// result with \p c . +template +inline constexpr unsigned extend_vcompare4_add(AT a, BT b, unsigned c, + BinaryOperation cmp) { + return detail::extend_vbinary4(a, b, c, cmp); +} + } // namespace syclcompat diff --git a/sycl/test-e2e/syclcompat/math/math_extend_v_2.cpp b/sycl/test-e2e/syclcompat/math/math_extend_v_2.cpp new file mode 100644 index 0000000000000..256cedc4602f2 --- /dev/null +++ b/sycl/test-e2e/syclcompat/math/math_extend_v_2.cpp @@ -0,0 +1,464 @@ +/*************************************************************************** + * + * Copyright (C) Codeplay Software Ltd. + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM + * Exceptions. See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SYCLcompat API + * + * math_extend_v_2.cpp + * + * Description: + * math extend 2-vectorized helpers tests + **************************************************************************/ + +// ===------------- math_extend_vfunc_2.cpp ----------------*- C++ -*-----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// ===---------------------------------------------------------------------===// + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include +#include +#include + +#include +#include +#include + +#define CHECK(S, REF) \ + { \ + auto ret = S; \ + if (ret != REF) { \ + return {#S, REF}; \ + } \ + } + +std::pair vadd2() { + CHECK(syclcompat::extend_vadd2(0x0001FFFF, 0x00010005, 0), + 0x00020004); + CHECK(syclcompat::extend_vadd2(0x7FFF7FFF, 0x00010001, 0), + 0x80008000); + CHECK(syclcompat::extend_vadd2_sat(0x7FFF7FFF, 0x00010001, 0), + 0x7FFF7FFF); + + CHECK(syclcompat::extend_vadd2(0x00010002, 0x00020003, 0), + 0x00030005); + CHECK(syclcompat::extend_vadd2(0xFFFEFFFF, 0x00030003, 0), + 0x00010002); + CHECK(syclcompat::extend_vadd2_sat((uint32_t)0xFFFEFFFF, + (uint32_t)0x00030003, 0), + 0xFFFFFFFF); + return {nullptr, 0}; +} + +std::pair vsub2() { + + CHECK(syclcompat::extend_vsub2(0x0001FFFF, 0xFFFF0001, 0), + 0x0002FFFE); + // Testing API & Saturated API with mixed types + CHECK(syclcompat::extend_vsub2((int32_t)0x7FFFFFFD, + (int32_t)0xFFFA7FFF, 0), + 0x80057FFE); + CHECK(syclcompat::extend_vsub2((uint32_t)0x7FFFFFFD, + (uint32_t)0xFFFA7FFF, 0), + 0x80057FFE); + CHECK(syclcompat::extend_vsub2((uint32_t)0x7FFFFFFD, + (int32_t)0xFFFA7FFF, 0), + 0x80057FFE); + CHECK(syclcompat::extend_vsub2((int32_t)0x7FFFFFFD, + (uint32_t)0xFFFA7FFF, 0), + 0x80057FFE); + CHECK(syclcompat::extend_vsub2_sat((int32_t)0x7FFFFFFD, + (int32_t)0xFFFA7FFF, 0), + 0x7FFF8000); + CHECK(syclcompat::extend_vsub2_sat((uint32_t)0x7FFFFFFD, + (uint32_t)0xFFFA7FFF, 0), + 0x80057FFE); + CHECK(syclcompat::extend_vsub2_sat((int32_t)0x7FFFFFFD, + (uint32_t)0xFFFA7FFF, 0), + 0x80058000); + CHECK(syclcompat::extend_vsub2_sat((uint32_t)0x7FFFFFFD, + (int32_t)0xFFFA7FFF, 0), + 0x7FFF7FFE); + + CHECK(syclcompat::extend_vsub2(0x0002000B, 0x0001000A, 0), + 0x00010001); + CHECK(syclcompat::extend_vsub2((uint32_t)0x00010001, + (uint32_t)0x0002FFFF, 0), + 0xFFFF0002); + CHECK(syclcompat::extend_vsub2((int32_t)0x00010001, + (int32_t)0x0002FFFF, 0), + 0xFFFF0002); + CHECK(syclcompat::extend_vsub2_sat((uint32_t)0x00010001, + (uint32_t)0x0002FFFF, 0), + 0x00000000); + CHECK(syclcompat::extend_vsub2_sat((int32_t)0x00010001, + (int32_t)0x0002FFFF, 0), + 0x00000002); + + return {nullptr, 0}; +} + +std::pair vadd2_add() { + + CHECK(syclcompat::extend_vadd2_add(0x00010002, 0x00030004, 1), + 0x0000000B); + CHECK(syclcompat::extend_vadd2_add(0x0001FFFF, 0x0002FFFE, -1), + 0xFFFFFFFF); + CHECK(syclcompat::extend_vadd2_add(0x00017FFF, 0x00017FFF, 1), + 0x00010001); + + CHECK(syclcompat::extend_vadd2_add(0x00010002, 0x00030004, 1), + 0x0000000B); + CHECK(syclcompat::extend_vadd2_add((uint32_t)0x0001FFFF, + (uint32_t)0x0002FFFF, 1), + 0x00020002); + CHECK(syclcompat::extend_vadd2_add(0x0001FFFF, 0x0002FFFF, 1), + 0x00000002); + + return {nullptr, 0}; +} + +std::pair vsub2_add() { + + // Testing API with mixed types + CHECK(syclcompat::extend_vsub2_add((int32_t)0x0001FFFF, + (int32_t)0xFFFF0001, 1), + 1); + CHECK(syclcompat::extend_vsub2_add((uint32_t)0x7FFFFFFD, + (uint32_t)0xFFFA7FFF, -1), + 0x00000002); + CHECK(syclcompat::extend_vsub2_add((int32_t)0x7FFFFFFD, + (int32_t)0xFFFA7FFF, -1), + 0x00000002); + CHECK(syclcompat::extend_vsub2_add((int32_t)0x7FFFFFFD, + (uint32_t)0xFFFA7FFF, -1), + 0xFFFF0002); + CHECK(syclcompat::extend_vsub2_add((uint32_t)0x7FFFFFFD, + (int32_t)0xFFFA7FFF, -1), + 0x00010002); + + CHECK(syclcompat::extend_vsub2_add(0x0002000B, 0x0001000A, 1), + 0x00000003); + CHECK(syclcompat::extend_vsub2_add(0x00010001, 0x0002FFFF, 3), + 0x00000004); + + return {nullptr, 0}; +} + +std::pair vabsdiff2() { + + CHECK(syclcompat::extend_vabsdiff2((int32_t)0xFFFF0001, + (int32_t)0x0003FFFF, 0), + 0x00040002); + CHECK(syclcompat::extend_vabsdiff2((int32_t)0x80000002, + (int32_t)0x00010001, 0), + 0x80010001); + CHECK(syclcompat::extend_vabsdiff2_sat((int32_t)0x80000002, + (int32_t)0x00010001, 0), + 0x7FFF0001); + + CHECK(syclcompat::extend_vabsdiff2(0x00010004, 0x00030002, 0), + 0x00020002); + CHECK(syclcompat::extend_vabsdiff2((uint32_t)0xFFFF0001, + (int32_t)0xFFFE0003, 0), + 0x00010002); + CHECK(syclcompat::extend_vabsdiff2_sat((uint32_t)0xFFFF0001, + (int32_t)0xFFFE0003, 0), + 0xFFFF0002); + + return {nullptr, 0}; +} + +std::pair vabsdiff2_add() { + + CHECK(syclcompat::extend_vabsdiff2_add((int32_t)0xFFFF0001, + (int32_t)0x0003FFFF, -2), + 0x00000004); + + CHECK(syclcompat::extend_vabsdiff2_add(0x000A000C, 0x000B000A, 1), + 0x00000004); + + return {nullptr, 0}; +} + +std::pair vmin2() { + + CHECK(syclcompat::extend_vmin2((int32_t)0xFFFF0002, 0x00010001, 0), + (int32_t)0xFFFF0001); + CHECK(syclcompat::extend_vmin2_sat(0x0002FFF1, 0x0001FFF2, 0), + 0x0001FFF1); + + CHECK(syclcompat::extend_vmin2(0x000A000D, 0x000B000C, 0), + 0x000A000C); + CHECK(syclcompat::extend_vmin2_sat(0x0002FFF1, 0x0001FFF2, 0), + 0x00010000); + + return {nullptr, 0}; +} + +std::pair vmax2() { + + CHECK(syclcompat::extend_vmax2((int32_t)0xFFFF0002, 0x00010001, 0), + 0x00010002); + CHECK(syclcompat::extend_vmax2_sat(0x80008000, 0x00018001, 0), + 0x7FFF7FFF); + + CHECK(syclcompat::extend_vmax2(0x000A000D, 0x000B000C, 0), + 0x000B000D); + CHECK(syclcompat::extend_vmax2_sat(0x0002FFF1, 0x0001FFF2, 0), + 0x00020000); + + return {nullptr, 0}; +} + +std::pair vmin2_vmax2_add() { + + CHECK( + syclcompat::extend_vmin2_add((int32_t)0xFFFF0002, 0x00010001, 2), + 0x00000002); + CHECK(syclcompat::extend_vmin2_add(0x000A000D, 0x000B000C, 2), + 0x00000018); + + CHECK(syclcompat::extend_vmax2_add((int32_t)0xFFFF0002, 0x00010001, + -2), + 0x00000001); + CHECK(syclcompat::extend_vmax2_add(0x000A000D, 0x000B000C, 2), + 0x0000001A); + + return {nullptr, 0}; +} + +std::pair vavrg2() { + + CHECK(syclcompat::extend_vavrg2((int32_t)0xFFFFFFF6, 0x0005FFFA, 0), + 0x0002FFF8); + CHECK(syclcompat::extend_vavrg2_sat((int32_t)0xFFFFFFF6, 0x0005FFFA, + 0), + 0x0002FFF8); + + CHECK(syclcompat::extend_vavrg2(0x00010006, 0x00030001, 0), + 0x00020004); + CHECK(syclcompat::extend_vavrg2_sat(0x00010006, 0x00030001, 0), + 0x00020004); + + return {nullptr, 0}; +} + +std::pair vavrg2_add() { + + CHECK(syclcompat::extend_vavrg2_add((int32_t)0xFFFFFFF6, 0x0005FFFA, + -2), + 0xFFFFFFF8); + + CHECK(syclcompat::extend_vavrg2_add(0x00010006, 0x00030002, 2), + 0x00000008); + + return {nullptr, 0}; +} + +std::pair vcompare2() { + + CHECK(syclcompat::extend_vcompare2(0x0002FFFF, 0x0001FFFF, std::greater<>()), + (unsigned)0x00010000); + CHECK(syclcompat::extend_vcompare2((uint32_t)0x0002FFFF, (int32_t)0x0001FFFF, + std::greater<>()), + (unsigned)0x00010001); + CHECK(syclcompat::extend_vcompare2((int32_t)0x0002FFFF, (uint32_t)0x0001FFFF, + std::greater<>()), + (unsigned)0x00010000); + + CHECK(syclcompat::extend_vcompare2(0x0002FFFF, 0x0001FFFF, std::less<>()), + (unsigned)0x00000000); + CHECK(syclcompat::extend_vcompare2(0x0002FFFF, 0x0002FFFF, + std::greater_equal<>()), + (unsigned)0x00010001); + CHECK( + syclcompat::extend_vcompare2(0x0002FFFF, 0x0001FFFF, std::less_equal<>()), + (unsigned)0x00000001); + CHECK(syclcompat::extend_vcompare2(0xFFFE0002, 0xFFFF0002, std::equal_to<>()), + (unsigned)0x00000001); + CHECK(syclcompat::extend_vcompare2(0xFFFE0002, 0xFFFF0002, + std::not_equal_to<>()), + (unsigned)0x00010000); + + return {nullptr, 0}; +} + +std::pair vcompare2_add() { + + CHECK(syclcompat::extend_vcompare2_add(0x0002FFFF, 0x0001FFFF, 1, + std::greater<>()), + (unsigned)0x00000002); + CHECK(syclcompat::extend_vcompare2_add(0x0002FFFF, 0x0001FFFF, 2, + std::less<>()), + (unsigned)0x00000002); + CHECK(syclcompat::extend_vcompare2_add(0x0002FFFF, 0x0002FFFF, 1, + std::greater_equal<>()), + (unsigned)0x00000003); + CHECK(syclcompat::extend_vcompare2_add(0x0002FFFF, 0x0001FFFF, 2, + std::less_equal<>()), + (unsigned)0x00000003); + CHECK(syclcompat::extend_vcompare2_add(0xFFFE0002, 0xFFFF0002, 0xFFFF, + std::equal_to<>()), + (unsigned)0x00010000); + CHECK(syclcompat::extend_vcompare2_add(0xFFFE0002, 0xFFFF0002, 0xFF, + std::not_equal_to<>()), + (unsigned)0x00000100); + + return {nullptr, 0}; +} + +void test(const sycl::stream &s, int *ec) { + { + auto res = vadd2(); + if (res.first) { + s << res.first << " = " << res.second << " check failed!\n"; + *ec = 1; + return; + } + s << "vadd2 check passed!\n"; + } + { + auto res = vsub2(); + if (res.first) { + s << res.first << " = " << res.second << " check failed!\n"; + *ec = 2; + return; + } + s << "vsub2 check passed!\n"; + } + { + auto res = vadd2_add(); + if (res.first) { + s << res.first << " = " << res.second << " check failed!\n"; + *ec = 3; + return; + } + s << "vadd2_add check passed!\n"; + } + { + auto res = vsub2_add(); + if (res.first) { + s << res.first << " = " << res.second << " check failed!\n"; + *ec = 4; + return; + } + s << "vsub2_add check passed!\n"; + } + { + auto res = vabsdiff2(); + if (res.first) { + s << res.first << " = " << res.second << " check failed!\n"; + *ec = 5; + return; + } + s << "vabsdiff2 check passed!\n"; + } + { + auto res = vmin2(); + if (res.first) { + s << res.first << " = " << res.second << " check failed!\n"; + *ec = 6; + return; + } + s << "vmin2 check passed!\n"; + } + { + auto res = vmax2(); + if (res.first) { + s << res.first << " = " << res.second << " check failed!\n"; + *ec = 7; + return; + } + s << "vmax2 check passed!\n"; + } + { + auto res = vmin2_vmax2_add(); + if (res.first) { + s << res.first << " = " << res.second << " check failed!\n"; + *ec = 8; + return; + } + s << "vmin2_add/vmax2_add check passed!\n"; + } + { + auto res = vavrg2(); + if (res.first) { + s << res.first << " = " << res.second << " check failed!\n"; + *ec = 9; + return; + } + s << "vavrg2 check passed!\n"; + } + { + auto res = vavrg2_add(); + if (res.first) { + s << res.first << " = " << res.second << " check failed!\n"; + *ec = 10; + return; + } + s << "vavrg2_add check passed!\n"; + } + { + auto res = vabsdiff2_add(); + if (res.first) { + s << res.first << " = " << res.second << " check failed!\n"; + *ec = 11; + return; + } + s << "vabsdiff2_add check passed!\n"; + } + { + auto res = vcompare2(); + if (res.first) { + s << res.first << " = " << res.second << " check failed!\n"; + *ec = 12; + return; + } + s << "vcompare2 check passed!\n"; + } + { + auto res = vcompare2_add(); + if (res.first) { + s << res.first << " = " << res.second << " check failed!\n"; + *ec = 13; + return; + } + s << "vcompare2_add check passed!\n"; + } + *ec = 0; +} + +int main() { + sycl::queue q = syclcompat::get_default_queue(); + int *ec = syclcompat::malloc(1); + syclcompat::fill(ec, 0, 1); + q.submit([&](sycl::handler &cgh) { + sycl::stream out(1024, 256, cgh); + cgh.parallel_for(1, [=](sycl::item<1> it) { test(out, ec); }); + }); + q.wait_and_throw(); + + int ec_h; + syclcompat::memcpy(&ec_h, ec, 1); + + return ec_h; +} diff --git a/sycl/test-e2e/syclcompat/math/math_extend_v.cpp b/sycl/test-e2e/syclcompat/math/math_extend_v_4.cpp similarity index 56% rename from sycl/test-e2e/syclcompat/math/math_extend_v.cpp rename to sycl/test-e2e/syclcompat/math/math_extend_v_4.cpp index 27bacc106b9e9..a2ac657000fbb 100644 --- a/sycl/test-e2e/syclcompat/math/math_extend_v.cpp +++ b/sycl/test-e2e/syclcompat/math/math_extend_v_4.cpp @@ -14,13 +14,13 @@ * * SYCLcompat API * - * math_extend_v.cpp + * math_extend_v_4.cpp * * Description: - * math extend-vectorized helpers tests + * math extend 4-vectorized helpers tests **************************************************************************/ -// ===------------- math_extend_vfunc[2/4].cpp --------------*- C++ -*-----===// +// ===------------- math_extend_vfunc_4.cpp ----------------*- C++ -*-----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -50,231 +50,6 @@ } \ } -std::pair vadd2() { - CHECK(syclcompat::extend_vadd2(0x0001FFFF, 0x00010005, 0), - 0x00020004); - CHECK(syclcompat::extend_vadd2(0x7FFF7FFF, 0x00010001, 0), - 0x80008000); - CHECK(syclcompat::extend_vadd2_sat(0x7FFF7FFF, 0x00010001, 0), - 0x7FFF7FFF); - - CHECK(syclcompat::extend_vadd2(0x00010002, 0x00020003, 0), - 0x00030005); - CHECK(syclcompat::extend_vadd2(0xFFFEFFFF, 0x00030003, 0), - 0x00010002); - CHECK(syclcompat::extend_vadd2_sat((uint32_t)0xFFFEFFFF, - (uint32_t)0x00030003, 0), - 0xFFFFFFFF); - return {nullptr, 0}; -} - -std::pair vsub2() { - - CHECK(syclcompat::extend_vsub2(0x0001FFFF, 0xFFFF0001, 0), - 0x0002FFFE); - // Testing API & Saturated API with mixed types - CHECK(syclcompat::extend_vsub2((int32_t)0x7FFFFFFD, - (int32_t)0xFFFA7FFF, 0), - 0x80057FFE); - CHECK(syclcompat::extend_vsub2((uint32_t)0x7FFFFFFD, - (uint32_t)0xFFFA7FFF, 0), - 0x80057FFE); - CHECK(syclcompat::extend_vsub2((uint32_t)0x7FFFFFFD, - (int32_t)0xFFFA7FFF, 0), - 0x80057FFE); - CHECK(syclcompat::extend_vsub2((int32_t)0x7FFFFFFD, - (uint32_t)0xFFFA7FFF, 0), - 0x80057FFE); - CHECK(syclcompat::extend_vsub2_sat((int32_t)0x7FFFFFFD, - (int32_t)0xFFFA7FFF, 0), - 0x7FFF8000); - CHECK(syclcompat::extend_vsub2_sat((uint32_t)0x7FFFFFFD, - (uint32_t)0xFFFA7FFF, 0), - 0x80057FFE); - CHECK(syclcompat::extend_vsub2_sat((int32_t)0x7FFFFFFD, - (uint32_t)0xFFFA7FFF, 0), - 0x80058000); - CHECK(syclcompat::extend_vsub2_sat((uint32_t)0x7FFFFFFD, - (int32_t)0xFFFA7FFF, 0), - 0x7FFF7FFE); - - CHECK(syclcompat::extend_vsub2(0x0002000B, 0x0001000A, 0), - 0x00010001); - CHECK(syclcompat::extend_vsub2((uint32_t)0x00010001, - (uint32_t)0x0002FFFF, 0), - 0xFFFF0002); - CHECK(syclcompat::extend_vsub2((int32_t)0x00010001, - (int32_t)0x0002FFFF, 0), - 0xFFFF0002); - CHECK(syclcompat::extend_vsub2_sat((uint32_t)0x00010001, - (uint32_t)0x0002FFFF, 0), - 0x00000000); - CHECK(syclcompat::extend_vsub2_sat((int32_t)0x00010001, - (int32_t)0x0002FFFF, 0), - 0x00000002); - - return {nullptr, 0}; -} - -std::pair vadd2_add() { - - CHECK(syclcompat::extend_vadd2_add(0x00010002, 0x00030004, 1), - 0x0000000B); - CHECK(syclcompat::extend_vadd2_add(0x0001FFFF, 0x0002FFFE, -1), - 0xFFFFFFFF); - CHECK(syclcompat::extend_vadd2_add(0x00017FFF, 0x00017FFF, 1), - 0x00010001); - - CHECK(syclcompat::extend_vadd2_add(0x00010002, 0x00030004, 1), - 0x0000000B); - CHECK(syclcompat::extend_vadd2_add((uint32_t)0x0001FFFF, - (uint32_t)0x0002FFFF, 1), - 0x00020002); - CHECK(syclcompat::extend_vadd2_add(0x0001FFFF, 0x0002FFFF, 1), - 0x00000002); - - return {nullptr, 0}; -} - -std::pair vsub2_add() { - - // Testing API with mixed types - CHECK(syclcompat::extend_vsub2_add((int32_t)0x0001FFFF, - (int32_t)0xFFFF0001, 1), - 1); - CHECK(syclcompat::extend_vsub2_add((uint32_t)0x7FFFFFFD, - (uint32_t)0xFFFA7FFF, -1), - 0x00000002); - CHECK(syclcompat::extend_vsub2_add((int32_t)0x7FFFFFFD, - (int32_t)0xFFFA7FFF, -1), - 0x00000002); - CHECK(syclcompat::extend_vsub2_add((int32_t)0x7FFFFFFD, - (uint32_t)0xFFFA7FFF, -1), - 0xFFFF0002); - CHECK(syclcompat::extend_vsub2_add((uint32_t)0x7FFFFFFD, - (int32_t)0xFFFA7FFF, -1), - 0x00010002); - - CHECK(syclcompat::extend_vsub2_add(0x0002000B, 0x0001000A, 1), - 0x00000003); - CHECK(syclcompat::extend_vsub2_add(0x00010001, 0x0002FFFF, 3), - 0x00000004); - - return {nullptr, 0}; -} - -std::pair vabsdiff2() { - - CHECK(syclcompat::extend_vabsdiff2((int32_t)0xFFFF0001, - (int32_t)0x0003FFFF, 0), - 0x00040002); - CHECK(syclcompat::extend_vabsdiff2((int32_t)0x80000002, - (int32_t)0x00010001, 0), - 0x80010001); - CHECK(syclcompat::extend_vabsdiff2_sat((int32_t)0x80000002, - (int32_t)0x00010001, 0), - 0x7FFF0001); - - CHECK(syclcompat::extend_vabsdiff2(0x00010004, 0x00030002, 0), - 0x00020002); - CHECK(syclcompat::extend_vabsdiff2((uint32_t)0xFFFF0001, - (int32_t)0xFFFE0003, 0), - 0x00010002); - CHECK(syclcompat::extend_vabsdiff2_sat((uint32_t)0xFFFF0001, - (int32_t)0xFFFE0003, 0), - 0xFFFF0002); - - return {nullptr, 0}; -} - -std::pair vabsdiff2_add() { - - CHECK(syclcompat::extend_vabsdiff2_add((int32_t)0xFFFF0001, - (int32_t)0x0003FFFF, -2), - 0x00000004); - - CHECK(syclcompat::extend_vabsdiff2_add(0x000A000C, 0x000B000A, 1), - 0x00000004); - - return {nullptr, 0}; -} - -std::pair vmin2() { - - CHECK(syclcompat::extend_vmin2((int32_t)0xFFFF0002, 0x00010001, 0), - (int32_t)0xFFFF0001); - CHECK(syclcompat::extend_vmin2_sat(0x0002FFF1, 0x0001FFF2, 0), - 0x0001FFF1); - - CHECK(syclcompat::extend_vmin2(0x000A000D, 0x000B000C, 0), - 0x000A000C); - CHECK(syclcompat::extend_vmin2_sat(0x0002FFF1, 0x0001FFF2, 0), - 0x00010000); - - return {nullptr, 0}; -} - -std::pair vmax2() { - - CHECK(syclcompat::extend_vmax2((int32_t)0xFFFF0002, 0x00010001, 0), - 0x00010002); - CHECK(syclcompat::extend_vmax2_sat(0x80008000, 0x00018001, 0), - 0x7FFF7FFF); - - CHECK(syclcompat::extend_vmax2(0x000A000D, 0x000B000C, 0), - 0x000B000D); - CHECK(syclcompat::extend_vmax2_sat(0x0002FFF1, 0x0001FFF2, 0), - 0x00020000); - - return {nullptr, 0}; -} - -std::pair vmin2_vmax2_add() { - - CHECK( - syclcompat::extend_vmin2_add((int32_t)0xFFFF0002, 0x00010001, 2), - 0x00000002); - CHECK(syclcompat::extend_vmin2_add(0x000A000D, 0x000B000C, 2), - 0x00000018); - - CHECK(syclcompat::extend_vmax2_add((int32_t)0xFFFF0002, 0x00010001, - -2), - 0x00000001); - CHECK(syclcompat::extend_vmax2_add(0x000A000D, 0x000B000C, 2), - 0x0000001A); - - return {nullptr, 0}; -} - -std::pair vavrg2() { - - CHECK(syclcompat::extend_vavrg2((int32_t)0xFFFFFFF6, 0x0005FFFA, 0), - 0x0002FFF8); - CHECK(syclcompat::extend_vavrg2_sat((int32_t)0xFFFFFFF6, 0x0005FFFA, - 0), - 0x0002FFF8); - - CHECK(syclcompat::extend_vavrg2(0x00010006, 0x00030001, 0), - 0x00020004); - CHECK(syclcompat::extend_vavrg2_sat(0x00010006, 0x00030001, 0), - 0x00020004); - - return {nullptr, 0}; -} - -std::pair vavrg2_add() { - - CHECK(syclcompat::extend_vavrg2_add((int32_t)0xFFFFFFF6, 0x0005FFFA, - -2), - 0xFFFFFFF8); - - CHECK(syclcompat::extend_vavrg2_add(0x00010006, 0x00030002, 2), - 0x00000008); - - return {nullptr, 0}; -} - -// v4 std::pair vadd4() { CHECK(syclcompat::extend_vadd4(0x0102FFFE, 0x01FF02FF, 0), 0x020101FD); @@ -514,204 +289,175 @@ std::pair vavrg4_add() { return {nullptr, 0}; } +std::pair vcompare4() { + + CHECK(syclcompat::extend_vcompare4(0x0102FEFF, 0x01FFFFFE, std::greater<>()), + (unsigned)0x00010001); + CHECK(syclcompat::extend_vcompare4((uint32_t)0x0102FEFF, (int32_t)0x01FFFFFE, + std::greater<>()), + (unsigned)0x00010101); + CHECK(syclcompat::extend_vcompare4((int32_t)0x0102FEFF, (uint32_t)0x01FFFFFE, + std::greater<>()), + (unsigned)0x00000000); + + CHECK(syclcompat::extend_vcompare4(0x0102FEFF, 0x01FFFFFE, std::less<>()), + (unsigned)0x00000100); + CHECK(syclcompat::extend_vcompare4(0x0102FEFF, 0x01FFFFFE, + std::greater_equal<>()), + (unsigned)0x01010001); + CHECK( + syclcompat::extend_vcompare4(0x0102FEFF, 0x01FFFFFE, std::less_equal<>()), + (unsigned)0x01000100); + CHECK(syclcompat::extend_vcompare4(0xFFFE0102, 0xFFFF0202, std::equal_to<>()), + (unsigned)0x01000001); + CHECK(syclcompat::extend_vcompare4(0xFFFE0102, 0xFFFF0202, + std::not_equal_to<>()), + (unsigned)0x00010100); + + return {nullptr, 0}; +} + +std::pair vcompare4_add() { + + CHECK(syclcompat::extend_vcompare4_add(0x0102FEFF, 0x01FFFFFE, 1, + std::greater<>()), + (unsigned)0x00000003); + CHECK(syclcompat::extend_vcompare4_add(0x0102FEFF, 0x01FFFFFE, 1, + std::less<>()), + (unsigned)0x00000002); + CHECK(syclcompat::extend_vcompare4_add(0x0102FEFF, 0x01FFFFFE, 2, + std::greater_equal<>()), + (unsigned)0x00000005); + CHECK(syclcompat::extend_vcompare4_add(0x0102FEFF, 0x01FFFFFE, 2, + std::less_equal<>()), + (unsigned)0x00000004); + CHECK(syclcompat::extend_vcompare4_add(0xFFFE0102, 0xFFFF0202, 0xFF, + std::equal_to<>()), + (unsigned)0x00000101); + CHECK(syclcompat::extend_vcompare4_add(0xFFFE0102, 0xFFFF0202, 0xFFFF, + std::not_equal_to<>()), + (unsigned)0x00010001); + + return {nullptr, 0}; +} + void test(const sycl::stream &s, int *ec) { { - auto res = vadd2(); + auto res = vadd4(); if (res.first) { s << res.first << " = " << res.second << " check failed!\n"; *ec = 1; return; } - s << "vadd2 check passed!\n"; + s << "vadd4 check passed!\n"; } { - auto res = vsub2(); + auto res = vsub4(); if (res.first) { s << res.first << " = " << res.second << " check failed!\n"; *ec = 2; return; } - s << "vsub2 check passed!\n"; + s << "vsub4 check passed!\n"; } { - auto res = vadd2_add(); + auto res = vadd4_add(); if (res.first) { s << res.first << " = " << res.second << " check failed!\n"; *ec = 3; return; } - s << "vadd2_add check passed!\n"; + s << "vadd4_add check passed!\n"; } { - auto res = vsub2_add(); + auto res = vsub4_add(); if (res.first) { s << res.first << " = " << res.second << " check failed!\n"; *ec = 4; return; } - s << "vsub2_add check passed!\n"; + s << "vsub4_add check passed!\n"; } { - auto res = vabsdiff2(); + auto res = vabsdiff4(); if (res.first) { s << res.first << " = " << res.second << " check failed!\n"; *ec = 5; return; } - s << "vabsdiff2 check passed!\n"; + s << "vabsdiff4 check passed!\n"; } { - auto res = vmin2(); + auto res = vabsdiff4_add(); if (res.first) { s << res.first << " = " << res.second << " check failed!\n"; *ec = 6; return; } - s << "vmin2 check passed!\n"; + s << "vabsdiff4_add check passed!\n"; } { - auto res = vmax2(); + auto res = vmin4(); if (res.first) { s << res.first << " = " << res.second << " check failed!\n"; *ec = 7; return; } - s << "vmax2 check passed!\n"; + s << "vmin4 check passed!\n"; } { - auto res = vmin2_vmax2_add(); + auto res = vmax4(); if (res.first) { s << res.first << " = " << res.second << " check failed!\n"; *ec = 8; return; } - s << "vmin2_add/vmax2_add check passed!\n"; + s << "vmax4 check passed!\n"; } { - auto res = vavrg2(); + auto res = vmin4_vmax4_add(); if (res.first) { s << res.first << " = " << res.second << " check failed!\n"; *ec = 9; return; } - s << "vavrg2 check passed!\n"; + s << "vmin4_add/vmax4_add check passed!\n"; } { - auto res = vavrg2_add(); + auto res = vavrg4(); if (res.first) { s << res.first << " = " << res.second << " check failed!\n"; *ec = 10; return; } - s << "vavrg2_add check passed!\n"; + s << "vavrg4 check passed!\n"; } { - auto res = vabsdiff2_add(); + auto res = vavrg4_add(); if (res.first) { s << res.first << " = " << res.second << " check failed!\n"; *ec = 11; return; } - s << "vabsdiff2_add check passed!\n"; + s << "vavrg4_add check passed!\n"; } { - auto res = vadd4(); + auto res = vcompare4(); if (res.first) { s << res.first << " = " << res.second << " check failed!\n"; *ec = 12; return; } - s << "vadd4 check passed!\n"; + s << "vcompare4 check passed!\n"; } { - auto res = vsub4(); + auto res = vcompare4_add(); if (res.first) { s << res.first << " = " << res.second << " check failed!\n"; *ec = 13; return; } - s << "vsub4 check passed!\n"; - } - { - auto res = vadd4_add(); - if (res.first) { - s << res.first << " = " << res.second << " check failed!\n"; - *ec = 14; - return; - } - s << "vadd4_add check passed!\n"; - } - { - auto res = vsub4_add(); - if (res.first) { - s << res.first << " = " << res.second << " check failed!\n"; - *ec = 15; - return; - } - s << "vsub4_add check passed!\n"; - } - { - auto res = vabsdiff4(); - if (res.first) { - s << res.first << " = " << res.second << " check failed!\n"; - *ec = 16; - return; - } - s << "vabsdiff4 check passed!\n"; - } - { - auto res = vabsdiff4_add(); - if (res.first) { - s << res.first << " = " << res.second << " check failed!\n"; - *ec = 17; - return; - } - s << "vabsdiff4_add check passed!\n"; - } - { - auto res = vmin4(); - if (res.first) { - s << res.first << " = " << res.second << " check failed!\n"; - *ec = 18; - return; - } - s << "vmin4 check passed!\n"; - } - { - auto res = vmax4(); - if (res.first) { - s << res.first << " = " << res.second << " check failed!\n"; - *ec = 19; - return; - } - s << "vmax4 check passed!\n"; - } - { - auto res = vmin4_vmax4_add(); - if (res.first) { - s << res.first << " = " << res.second << " check failed!\n"; - *ec = 20; - return; - } - s << "vmin4_add/vmax4_add check passed!\n"; - } - { - auto res = vavrg4(); - if (res.first) { - s << res.first << " = " << res.second << " check failed!\n"; - *ec = 21; - return; - } - s << "vavrg4 check passed!\n"; - } - { - auto res = vavrg4_add(); - if (res.first) { - s << res.first << " = " << res.second << " check failed!\n"; - *ec = 22; - return; - } - s << "vavrg4_add check passed!\n"; + s << "vcompare4_add check passed!\n"; } *ec = 0; }