Skip to content

Commit

Permalink
Add q7x16 and q15x8 for ARM NEON.
Browse files Browse the repository at this point in the history
  • Loading branch information
tobiashienzsch committed Jan 24, 2024
1 parent d15fe71 commit 819f428
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 13 deletions.
27 changes: 15 additions & 12 deletions extra/benchmark/src/multiply_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,31 +157,34 @@ auto batch_split_multiply_add(benchmark::State& state) -> void

} // namespace

BENCHMARK(multiply_add<std::complex<float>>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
BENCHMARK(multiply_add<std::complex<double>>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
// BENCHMARK(multiply_add<std::complex<float>>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
// BENCHMARK(multiply_add<std::complex<double>>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);

#if defined(NEO_HAS_BUILTIN_FLOAT16)
BENCHMARK(multiply_add<neo::complex32>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
#endif
BENCHMARK(multiply_add<neo::complex64>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
BENCHMARK(multiply_add<neo::complex128>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
// BENCHMARK(multiply_add<neo::complex64>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
// BENCHMARK(multiply_add<neo::complex128>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);

#if defined(NEO_HAS_BUILTIN_FLOAT16)
BENCHMARK(split_multiply_add<_Float16>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
#endif
BENCHMARK(split_multiply_add<float>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
BENCHMARK(split_multiply_add<double>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
// BENCHMARK(split_multiply_add<float>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
// BENCHMARK(split_multiply_add<double>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);

#if defined(NEO_HAS_XSIMD)
BENCHMARK(batch_split_multiply_add<xsimd::batch<float>>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
BENCHMARK(batch_split_multiply_add<xsimd::batch<double>>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
#endif
// #if defined(NEO_HAS_XSIMD)
// BENCHMARK(batch_split_multiply_add<xsimd::batch<float>>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
// BENCHMARK(batch_split_multiply_add<xsimd::batch<double>>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
// #endif

BENCHMARK(split_multiply_add<neo::q7>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
// BENCHMARK(split_multiply_add<neo::q7>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
BENCHMARK(split_multiply_add<neo::q15>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);

#if defined(NEO_HAS_ISA_AVX2)
#if defined(NEO_HAS_ISA_NEON) or defined(NEO_HAS_ISA_AVX2)
BENCHMARK(batch_split_multiply_add<neo::q15x8>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
#endif

#if defined(NEO_HAS_ISA_AVX2)
BENCHMARK(batch_split_multiply_add<neo::q15x16>)->RangeMultiplier(2)->Range(1 << 7, 1 << 24);
#endif

Expand Down
2 changes: 1 addition & 1 deletion src/neo/fixed_point/fixed_point_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ static auto test_simd_fixed_point()
}
}

#if defined(NEO_HAS_ISA_SSE2)
#if defined(NEO_HAS_ISA_SSE2) or defined(NEO_HAS_ISA_NEON)

TEMPLATE_TEST_CASE("neo/fixed_point: batch", "", neo::q7x16, neo::q15x8)
{
Expand Down
101 changes: 101 additions & 0 deletions src/neo/fixed_point/simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,107 @@ inline constexpr auto const mul_kernel_avx512_s16 = [](__m512i lhs, __m512i rhs)

} // namespace detail

#if defined(NEO_HAS_ISA_NEON)

struct alignas(16) q7x16
{
using value_type = neo::q7;
using register_type = int8x16_t;

static constexpr auto const alignment = alignof(register_type);
static constexpr auto const size = sizeof(register_type) / sizeof(value_type);

q7x16() = default;

NEO_ALWAYS_INLINE q7x16(register_type reg) noexcept : _reg{reg} {}

[[nodiscard]] NEO_ALWAYS_INLINE explicit operator register_type() const noexcept { return _reg; }

[[nodiscard]] static auto broadcast(value_type val) noexcept -> q7x16
{
auto const v = val.value();
return vld1q_dup_s8(&v);
}

[[nodiscard]] static auto load_unaligned(value_type const* input) noexcept -> q7x16
{
auto const* integer = reinterpret_cast<value_type::storage_type const*>(input);
return vld1q_s8(integer);
}

auto store_unaligned(value_type* output) const noexcept -> void
{
auto* integer = reinterpret_cast<value_type::storage_type*>(output);
return vst1q_s8(integer, _reg);
}

NEO_ALWAYS_INLINE friend auto operator+(q7x16 lhs, q7x16 rhs) noexcept -> q7x16
{
return vqaddq_s8(static_cast<register_type>(lhs), static_cast<register_type>(rhs));
}

NEO_ALWAYS_INLINE friend auto operator-(q7x16 lhs, q7x16 rhs) noexcept -> q7x16
{
return vqsubq_s8(static_cast<register_type>(lhs), static_cast<register_type>(rhs));
}

private:
register_type _reg;
};

struct alignas(16) q15x8
{
using value_type = neo::q15;
using register_type = int16x8_t;

static constexpr auto const alignment = alignof(register_type);
static constexpr auto const size = sizeof(register_type) / sizeof(value_type);

q15x8() = default;

NEO_ALWAYS_INLINE q15x8(register_type reg) noexcept : _reg{reg} {}

[[nodiscard]] NEO_ALWAYS_INLINE explicit operator register_type() const noexcept { return _reg; }

[[nodiscard]] static auto broadcast(value_type val) noexcept -> q15x8
{
auto const v = val.value();
return vld1q_dup_s16(&v);
}

[[nodiscard]] static auto load_unaligned(value_type const* input) noexcept -> q15x8
{
auto const* integer = reinterpret_cast<value_type::storage_type const*>(input);
return vld1q_s16(integer);
}

auto store_unaligned(value_type* output) const noexcept -> void
{
auto* integer = reinterpret_cast<value_type::storage_type*>(output);
return vst1q_s16(integer, _reg);
}

NEO_ALWAYS_INLINE friend auto operator+(q15x8 lhs, q15x8 rhs) noexcept -> q15x8
{
return vqaddq_s16(static_cast<register_type>(lhs), static_cast<register_type>(rhs));
}

NEO_ALWAYS_INLINE friend auto operator-(q15x8 lhs, q15x8 rhs) noexcept -> q15x8
{
return vqsubq_s16(static_cast<register_type>(lhs), static_cast<register_type>(rhs));
}

NEO_ALWAYS_INLINE friend auto operator*(q15x8 lhs, q15x8 rhs) noexcept -> q15x8
{
return vqdmulhq_s16(static_cast<register_type>(lhs), static_cast<register_type>(rhs));
}

private:
register_type _reg;
};

#endif

#if defined(NEO_HAS_ISA_SSE2)

struct alignas(16) q7x16
Expand Down

0 comments on commit 819f428

Please sign in to comment.