@@ -573,6 +573,52 @@ namespace xsimd
573
573
return vld1q_f32 (src);
574
574
}
575
575
576
+ /* batch bool version */
577
+ template <class T , class A , detail::enable_sized_t <T, 1 > = 0 >
578
+ XSIMD_INLINE batch_bool<T, A> load_unaligned (bool const * mem, batch_bool<T, A>, requires_arch<neon>) noexcept
579
+ {
580
+ auto vmem = load_unaligned<A>((unsigned char const *)mem, convert<unsigned char > {}, A {});
581
+ return batch_bool<T, A>(bitwise_cast<typename std::make_unsigned<T>::type>(0 - vmem).data );
582
+ }
583
+ template <class T , class A , detail::enable_sized_t <T, 1 > = 0 >
584
+ XSIMD_INLINE batch_bool<T, A> load_aligned (bool const * mem, batch_bool<T, A> t, requires_arch<neon> r) noexcept
585
+ {
586
+ return load_unaligned (mem, t, r);
587
+ }
588
+
589
+ template <class T , class A , detail::enable_sized_t <T, 2 > = 0 >
590
+ XSIMD_INLINE batch_bool<T, A> load_unaligned (bool const * mem, batch_bool<T, A>, requires_arch<neon>) noexcept
591
+ {
592
+ batch<unsigned short , neon> vmem = vmovl_u8 (vld1_u8 ((unsigned char const *)mem));
593
+ return batch_bool<T, A>(bitwise_cast<typename std::make_unsigned<T>::type>(0 - vmem).data );
594
+ }
595
+
596
+ template <class T , class A , detail::enable_sized_t <T, 2 > = 0 >
597
+ XSIMD_INLINE batch_bool<T, A> load_aligned (bool const * mem, batch_bool<T, A> t, requires_arch<neon> r) noexcept
598
+ {
599
+ return load_unaligned (mem, t, r);
600
+ }
601
+
602
+ template <class T , class A , detail::enable_sized_t <T, 4 > = 0 >
603
+ XSIMD_INLINE batch_bool<T, A> load_unaligned (bool const * mem, batch_bool<T, A>, requires_arch<neon>) noexcept
604
+ {
605
+ uint32x2_t tmp = vset_lane_u32 (*(unsigned int *)mem, vdup_n_u32 (0 ), 0 );
606
+ return batch_bool<T, A>(bitwise_cast<typename std::make_unsigned<T>::type>(0 - vmovl_u16 (vget_low_u16 (vmovl_u8 (tmp)))));
607
+ }
608
+
609
+ template <class T , class A , detail::enable_sized_t <T, 4 > = 0 >
610
+ XSIMD_INLINE batch_bool<T, A> load_aligned (bool const * mem, batch_bool<T, A> t, requires_arch<neon> r) noexcept
611
+ {
612
+ return load_unaligned (mem, t, r);
613
+ }
614
+
615
+ template <class A > = 0 >
616
+ XSIMD_INLINE batch_bool<float , A> load_aligned (bool const * mem, batch_bool<float , A> t, requires_arch<neon> r) noexcept
617
+ {
618
+ uint32x2_t tmp = vset_lane_u32 (*(unsigned int *)mem, vdup_n_u32 (0 ), 0 );
619
+ return batch_bool<float , A>(0 - vmovl_u16 (vget_low_u16 (vmovl_u8 (tmp))));
620
+ }
621
+
576
622
/* ********
577
623
* store *
578
624
*********/
0 commit comments