Skip to content

Commit 6ad5b3d

Browse files
feat: add u8x16::unpack_low/high (#147)
This commit adds u8x16::unpack_low and u8x16::unpack_high which is basically SSE2's unpack_low_i8_m128i and unpack_high_i8_m128i, but also allows SIMD128 and NEON backends.
1 parent 86ff930 commit 6ad5b3d

File tree

2 files changed

+72
-0
lines changed

2 files changed

+72
-0
lines changed

src/u8x16_.rs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,62 @@ impl u8x16 {
459459
}
460460
}
461461

462+
/// Unpack and interleave low lanes of two u8x16
463+
pub fn unpack_low(lhs: u8x16, rhs: u8x16) -> u8x16 {
464+
pick! {
465+
if #[cfg(target_feature = "sse2")] {
466+
u8x16 { sse: unpack_low_i8_m128i(lhs.sse, rhs.sse) }
467+
} else if #[cfg(target_feature = "simd128")] {
468+
u8x16 { simd: u8x16_shuffle::<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23>(lhs.simd, rhs.simd) }
469+
} else if #[cfg(all(target_feature = "neon", target_arch = "aarch64"))] {
470+
let lhs = unsafe { vget_low_u8(lhs.neon) };
471+
let rhs = unsafe { vget_low_u8(rhs.neon) };
472+
473+
let zipped = unsafe { vzip_u8(lhs, rhs) };
474+
u8x16 { neon: unsafe { vcombine_u8(zipped.0, zipped.1) } }
475+
} else {
476+
u8x16::new([
477+
lhs.as_array_ref()[0], rhs.as_array_ref()[0],
478+
lhs.as_array_ref()[1], rhs.as_array_ref()[1],
479+
lhs.as_array_ref()[2], rhs.as_array_ref()[2],
480+
lhs.as_array_ref()[3], rhs.as_array_ref()[3],
481+
lhs.as_array_ref()[4], rhs.as_array_ref()[4],
482+
lhs.as_array_ref()[5], rhs.as_array_ref()[5],
483+
lhs.as_array_ref()[6], rhs.as_array_ref()[6],
484+
lhs.as_array_ref()[7], rhs.as_array_ref()[7],
485+
])
486+
}
487+
}
488+
}
489+
490+
/// Unpack and interleave high lanes of two u8x16
491+
pub fn unpack_high(lhs: u8x16, rhs: u8x16) -> u8x16 {
492+
pick! {
493+
if #[cfg(target_feature = "sse2")] {
494+
u8x16 { sse: unpack_high_i8_m128i(lhs.sse, rhs.sse) }
495+
} else if #[cfg(target_feature = "simd128")] {
496+
u8x16 { simd: u8x16_shuffle::<8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31>(lhs.simd, rhs.simd) }
497+
} else if #[cfg(all(target_feature = "neon", target_arch = "aarch64"))] {
498+
let lhs = unsafe { vget_high_u8(lhs.neon) };
499+
let rhs = unsafe { vget_high_u8(rhs.neon) };
500+
501+
let zipped = unsafe { vzip_u8(lhs, rhs) };
502+
u8x16 { neon: unsafe { vcombine_u8(zipped.0, zipped.1) } }
503+
} else {
504+
u8x16::new([
505+
lhs.as_array_ref()[8], rhs.as_array_ref()[8],
506+
lhs.as_array_ref()[9], rhs.as_array_ref()[9],
507+
lhs.as_array_ref()[10], rhs.as_array_ref()[10],
508+
lhs.as_array_ref()[11], rhs.as_array_ref()[11],
509+
lhs.as_array_ref()[12], rhs.as_array_ref()[12],
510+
lhs.as_array_ref()[13], rhs.as_array_ref()[13],
511+
lhs.as_array_ref()[14], rhs.as_array_ref()[14],
512+
lhs.as_array_ref()[15], rhs.as_array_ref()[15],
513+
])
514+
}
515+
}
516+
}
517+
462518
#[inline]
463519
pub fn to_array(self) -> [u8; 16] {
464520
cast(self)

tests/all_tests/t_u8x16.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,3 +163,19 @@ fn impl_u8x16_min() {
163163
let actual = a.min(b);
164164
assert_eq!(expected, actual);
165165
}
166+
167+
#[test]
168+
fn impl_unpack_low_u8() {
169+
let a = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
170+
let b = u8x16::from([12, 11, 22, 13, 99, 15, 16, 17, 8, 19, 2, 21, 22, 3, 24, 127]);
171+
let c: [u8; 16] = u8x16::unpack_low(a, b).into();
172+
assert_eq!(c, [0, 12, 1, 11, 2, 22, 3, 13, 4, 99, 5, 15, 6, 16, 7, 17]);
173+
}
174+
175+
#[test]
176+
fn impl_unpack_high_u8() {
177+
let a = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
178+
let b = u8x16::from([12, 11, 22, 13, 99, 15, 16, 17, 8, 19, 2, 21, 22, 3, 24, 127]);
179+
let c: [u8; 16] = u8x16::unpack_high(a, b).into();
180+
assert_eq!(c, [8, 8, 9, 19, 10, 2, 11, 21, 12, 22, 13, 3, 14, 24, 15, 127]);
181+
}

0 commit comments

Comments
 (0)