Skip to content

Commit dfff342

Browse files
Optimize loading of batch_bool from memory on arm
Use the same approach as #1172
1 parent 005f629 commit dfff342

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,32 @@ namespace xsimd
573573
return vld1q_f32(src);
574574
}
575575

576+
/* batch bool version */
577+
template <class T, class A, detail::enable_sized_t<T, 1> = 0>
578+
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<neon>) noexcept
579+
{
580+
auto vmem = load_unaligned<A>((unsigned char const*)mem, convert<unsigned char> {}, A {});
581+
return bitwise_cast<T>(0 - vmem);
582+
}
583+
template <class T, class A, detail::enable_sized_t<T, 1> = 0>
584+
XSIMD_INLINE batch_bool<T, A> load_aligned(bool const* mem, batch_bool<T, A> t, requires_arch<neon> r) noexcept
585+
{
586+
return load_unaligned(mem, t, r);
587+
}
588+
589+
template <class T, class A, detail::enable_sized_t<T, 2> = 0>
590+
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<neon>) noexcept
591+
{
592+
auto vmem = vmovl_u8(vld1_u8((unsigned char const*)mem));
593+
return bitwise_cast<T>(0 - vmem);
594+
}
595+
596+
template <class T, class A, detail::enable_sized_t<T, 2> = 0>
597+
XSIMD_INLINE batch_bool<T, A> load_aligned(bool const* mem, batch_bool<T, A> t, requires_arch<neon> r) noexcept
598+
{
599+
return load_unaligned(mem, t, r);
600+
}
601+
576602
/*********
577603
* store *
578604
*********/

0 commit comments

Comments
 (0)