From db271b07450b382d0d00ed1a8b3c80d8aa39cec9 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Sat, 3 Jan 2026 09:45:22 +0100 Subject: [PATCH 1/9] Implement `store_array` methods --- fearless_simd/src/generated/avx2.rs | 180 ++++++++++++++++ fearless_simd/src/generated/fallback.rs | 144 +++++++++++++ fearless_simd/src/generated/neon.rs | 144 +++++++++++++ fearless_simd/src/generated/simd_trait.rs | 72 +++++++ fearless_simd/src/generated/sse4_2.rs | 240 ++++++++++++++++++++++ fearless_simd/src/generated/wasm.rs | 240 ++++++++++++++++++++++ fearless_simd_gen/src/arch/neon.rs | 12 +- fearless_simd_gen/src/generic.rs | 46 ++++- fearless_simd_gen/src/mk_fallback.rs | 7 + fearless_simd_gen/src/mk_neon.rs | 8 +- fearless_simd_gen/src/mk_wasm.rs | 5 +- fearless_simd_gen/src/mk_x86.rs | 5 +- fearless_simd_gen/src/ops.rs | 25 ++- fearless_simd_tests/tests/harness/mod.rs | 203 ++++++++++++++++++ 14 files changed, 1321 insertions(+), 10 deletions(-) diff --git a/fearless_simd/src/generated/avx2.rs b/fearless_simd/src/generated/avx2.rs index bab26fde..f09e03d0 100644 --- a/fearless_simd/src/generated/avx2.rs +++ b/fearless_simd/src/generated/avx2.rs @@ -128,6 +128,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m128, &mut [f32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f32x4(self, a: f32x4, dest: &mut [f32; 4usize]) -> () { + unsafe { _mm_storeu_ps(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_f32x4(self, a: u8x16) -> f32x4 { unsafe { f32x4 { @@ -391,6 +395,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [i8; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i8x16(self, a: i8x16, dest: &mut [i8; 16usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i8x16(self, a: u8x16) -> i8x16 { unsafe { i8x16 { @@ -582,6 +590,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [u8; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u8x16(self, a: u8x16, dest: &mut [u8; 16usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u8x16(self, a: u8x16) -> u8x16 { unsafe { u8x16 { @@ -779,6 +791,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [i8; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16 { unsafe { mask8x16 { @@ -876,6 +892,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [i16; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i16x8(self, a: i16x8, dest: &mut [i16; 8usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i16x8(self, a: u8x16) -> i16x8 { unsafe { i16x8 { @@ -1042,6 +1062,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [u16; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u16x8(self, a: u16x8, dest: &mut [u16; 8usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u16x8(self, a: u8x16) -> u16x8 { unsafe { u16x8 { @@ -1214,6 +1238,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [i16; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8 { unsafe { mask16x8 { @@ -1311,6 +1339,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [i32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i32x4(self, a: i32x4, dest: &mut [i32; 4usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i32x4(self, a: u8x16) -> i32x4 { unsafe { i32x4 { @@ -1479,6 +1511,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [u32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u32x4(self, a: u32x4, dest: &mut [u32; 4usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u32x4(self, a: u8x16) -> u32x4 { unsafe { u32x4 { @@ -1659,6 +1695,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [i32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4 { unsafe { mask32x4 { @@ -1756,6 +1796,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m128d, &mut [f64; 2usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f64x2(self, a: f64x2, dest: &mut [f64; 2usize]) -> () { + unsafe { _mm_storeu_pd(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_f64x2(self, a: u8x16) -> f64x2 { unsafe { f64x2 { @@ -1948,6 +1992,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [i64; 2usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2 { unsafe { mask64x2 { @@ -2045,6 +2093,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m256, &mut [f32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f32x8(self, a: f32x8, dest: &mut [f32; 8usize]) -> () { + unsafe { _mm256_storeu_ps(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_f32x8(self, a: u8x32) -> f32x8 { unsafe { f32x8 { @@ -2348,6 +2400,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m256i, &mut [i8; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i8x32(self, a: i8x32, dest: &mut [i8; 32usize]) -> () { + unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i8x32(self, a: u8x32) -> i8x32 { unsafe { i8x32 { @@ -2579,6 +2635,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m256i, &mut [u8; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u8x32(self, a: u8x32, dest: &mut [u8; 32usize]) -> () { + unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u8x32(self, a: u8x32) -> u8x32 { unsafe { u8x32 { @@ -2821,6 +2881,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m256i, &mut [i8; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> () { + unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask8x32(self, a: u8x32) -> mask8x32 { unsafe { mask8x32 { @@ -2934,6 +2998,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m256i, &mut [i16; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i16x16(self, a: i16x16, dest: &mut [i16; 16usize]) -> () { + unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i16x16(self, a: u8x32) -> i16x16 { unsafe { i16x16 { @@ -3142,6 +3210,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m256i, &mut [u16; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u16x16(self, a: u16x16, dest: &mut [u16; 16usize]) -> () { + unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u16x16(self, a: u8x32) -> u16x16 { unsafe { u16x16 { @@ -3368,6 +3440,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m256i, &mut [i16; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> () { + unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask16x16(self, a: u8x32) -> mask16x16 { unsafe { mask16x16 { @@ -3481,6 +3557,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m256i, &mut [i32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i32x8(self, a: i32x8, dest: &mut [i32; 8usize]) -> () { + unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i32x8(self, a: u8x32) -> i32x8 { unsafe { i32x8 { @@ -3681,6 +3761,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m256i, &mut [u32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u32x8(self, a: u32x8, dest: &mut [u32; 8usize]) -> () { + unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u32x8(self, a: u8x32) -> u32x8 { unsafe { u32x8 { @@ -3896,6 +3980,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m256i, &mut [i32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> () { + unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask32x8(self, a: u8x32) -> mask32x8 { unsafe { mask32x8 { @@ -4005,6 +4093,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m256d, &mut [f64; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f64x4(self, a: f64x4, dest: &mut [f64; 4usize]) -> () { + unsafe { _mm256_storeu_pd(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_f64x4(self, a: u8x32) -> f64x4 { unsafe { f64x4 { @@ -4237,6 +4329,10 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut __m256i, &mut [i64; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> () { + unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask64x4(self, a: u8x32) -> mask64x4 { unsafe { mask64x4 { @@ -4357,6 +4453,13 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut [__m256; 2usize], &mut [f32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f32x16(self, a: f32x16, dest: &mut [f32; 16usize]) -> () { + unsafe { + _mm256_storeu_ps(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm256_storeu_ps(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_f32x16(self, a: u8x64) -> f32x16 { unsafe { f32x16 { @@ -4713,6 +4816,13 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut [__m256i; 2usize], &mut [i8; 64usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i8x64(self, a: i8x64, dest: &mut [i8; 64usize]) -> () { + unsafe { + _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm256_storeu_si256(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_i8x64(self, a: u8x64) -> i8x64 { unsafe { i8x64 { @@ -4939,6 +5049,13 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut [__m256i; 2usize], &mut [u8; 64usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u8x64(self, a: u8x64, dest: &mut [u8; 64usize]) -> () { + unsafe { + _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm256_storeu_si256(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_u8x64(self, a: u8x64) -> u8x64 { unsafe { u8x64 { @@ -5210,6 +5327,13 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut [__m256i; 2usize], &mut [i8; 64usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> () { + unsafe { + _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm256_storeu_si256(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_mask8x64(self, a: u8x64) -> mask8x64 { unsafe { mask8x64 { @@ -5346,6 +5470,13 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut [__m256i; 2usize], &mut [i16; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i16x32(self, a: i16x32, dest: &mut [i16; 32usize]) -> () { + unsafe { + _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm256_storeu_si256(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_i16x32(self, a: u8x64) -> i16x32 { unsafe { i16x32 { @@ -5581,6 +5712,13 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut [__m256i; 2usize], &mut [u16; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u16x32(self, a: u16x32, dest: &mut [u16; 32usize]) -> () { + unsafe { + _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm256_storeu_si256(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_u16x32(self, a: u8x64) -> u16x32 { unsafe { u16x32 { @@ -5879,6 +6017,13 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut [__m256i; 2usize], &mut [i16; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> () { + unsafe { + _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm256_storeu_si256(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_mask16x32(self, a: u8x64) -> mask16x32 { unsafe { mask16x32 { @@ -6018,6 +6163,13 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut [__m256i; 2usize], &mut [i32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i32x16(self, a: i32x16, dest: &mut [i32; 16usize]) -> () { + unsafe { + _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm256_storeu_si256(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_i32x16(self, a: u8x64) -> i32x16 { unsafe { i32x16 { @@ -6249,6 +6401,13 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut [__m256i; 2usize], &mut [u32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u32x16(self, a: u32x16, dest: &mut [u32; 16usize]) -> () { + unsafe { + _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm256_storeu_si256(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_u32x16(self, a: u8x64) -> u32x16 { unsafe { u32x16 { @@ -6512,6 +6671,13 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut [__m256i; 2usize], &mut [i32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> () { + unsafe { + _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm256_storeu_si256(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_mask32x16(self, a: u8x64) -> mask32x16 { unsafe { mask32x16 { @@ -6648,6 +6814,13 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut [__m256d; 2usize], &mut [f64; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f64x8(self, a: f64x8, dest: &mut [f64; 8usize]) -> () { + unsafe { + _mm256_storeu_pd(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm256_storeu_pd(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_f64x8(self, a: u8x64) -> f64x8 { unsafe { f64x8 { @@ -6912,6 +7085,13 @@ impl Simd for Avx2 { unsafe { core::mem::transmute::<&mut [__m256i; 2usize], &mut [i64; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> () { + unsafe { + _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm256_storeu_si256(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_mask64x8(self, a: u8x64) -> mask64x8 { unsafe { mask64x8 { diff --git a/fearless_simd/src/generated/fallback.rs b/fearless_simd/src/generated/fallback.rs index 1701a140..c2f39a1b 100644 --- a/fearless_simd/src/generated/fallback.rs +++ b/fearless_simd/src/generated/fallback.rs @@ -181,6 +181,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_f32x4(self, a: f32x4, dest: &mut [f32; 4usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_f32x4(self, a: u8x16) -> f32x4 { unsafe { f32x4 { @@ -534,6 +538,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_i8x16(self, a: i8x16, dest: &mut [i8; 16usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_i8x16(self, a: u8x16) -> i8x16 { unsafe { i8x16 { @@ -1096,6 +1104,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_u8x16(self, a: u8x16, dest: &mut [u8; 16usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_u8x16(self, a: u8x16) -> u8x16 { unsafe { u8x16 { @@ -1654,6 +1666,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16 { unsafe { mask8x16 { @@ -1946,6 +1962,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_i16x8(self, a: i16x8, dest: &mut [i16; 8usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_i16x8(self, a: u8x16) -> i16x8 { unsafe { i16x8 { @@ -2317,6 +2337,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_u16x8(self, a: u16x8, dest: &mut [u16; 8usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_u16x8(self, a: u8x16) -> u16x8 { unsafe { u16x8 { @@ -2674,6 +2698,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8 { unsafe { mask16x8 { @@ -2862,6 +2890,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_i32x4(self, a: i32x4, dest: &mut [i32; 4usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_i32x4(self, a: u8x16) -> i32x4 { unsafe { i32x4 { @@ -3151,6 +3183,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_u32x4(self, a: u32x4, dest: &mut [u32; 4usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_u32x4(self, a: u8x16) -> u32x4 { unsafe { u32x4 { @@ -3426,6 +3462,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4 { unsafe { mask32x4 { @@ -3562,6 +3602,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_f64x2(self, a: f64x2, dest: &mut [f64; 2usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_f64x2(self, a: u8x16) -> f64x2 { unsafe { f64x2 { @@ -3801,6 +3845,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2 { unsafe { mask64x2 { @@ -3922,6 +3970,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_f32x8(self, a: f32x8, dest: &mut [f32; 8usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_f32x8(self, a: u8x32) -> f32x8 { unsafe { f32x8 { @@ -4225,6 +4277,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_i8x32(self, a: i8x32, dest: &mut [i8; 32usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_i8x32(self, a: u8x32) -> i8x32 { unsafe { i8x32 { @@ -4443,6 +4499,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_u8x32(self, a: u8x32, dest: &mut [u8; 32usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_u8x32(self, a: u8x32) -> u8x32 { unsafe { u8x32 { @@ -4656,6 +4716,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_mask8x32(self, a: u8x32) -> mask8x32 { unsafe { mask8x32 { @@ -4784,6 +4848,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_i16x16(self, a: i16x16, dest: &mut [i16; 16usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_i16x16(self, a: u8x32) -> i16x16 { unsafe { i16x16 { @@ -5002,6 +5070,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_u16x16(self, a: u16x16, dest: &mut [u16; 16usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_u16x16(self, a: u8x32) -> u16x16 { unsafe { u16x16 { @@ -5237,6 +5309,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_mask16x16(self, a: u8x32) -> mask16x16 { unsafe { mask16x16 { @@ -5365,6 +5441,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_i32x8(self, a: i32x8, dest: &mut [i32; 8usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_i32x8(self, a: u8x32) -> i32x8 { unsafe { i32x8 { @@ -5588,6 +5668,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_u32x8(self, a: u32x8, dest: &mut [u32; 8usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_u32x8(self, a: u8x32) -> u32x8 { unsafe { u32x8 { @@ -5798,6 +5882,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_mask32x8(self, a: u8x32) -> mask32x8 { unsafe { mask32x8 { @@ -5926,6 +6014,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_f64x4(self, a: f64x4, dest: &mut [f64; 4usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_f64x4(self, a: u8x32) -> f64x4 { unsafe { f64x4 { @@ -6182,6 +6274,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_mask64x4(self, a: u8x32) -> mask64x4 { unsafe { mask64x4 { @@ -6310,6 +6406,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_f32x16(self, a: f32x16, dest: &mut [f32; 16usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_f32x16(self, a: u8x64) -> f32x16 { unsafe { f32x16 { @@ -6636,6 +6736,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_i8x64(self, a: i8x64, dest: &mut [i8; 64usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_i8x64(self, a: u8x64) -> i8x64 { unsafe { i8x64 { @@ -6847,6 +6951,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_u8x64(self, a: u8x64, dest: &mut [u8; 64usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_u8x64(self, a: u8x64) -> u8x64 { unsafe { u8x64 { @@ -7133,6 +7241,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_mask8x64(self, a: u8x64) -> mask8x64 { unsafe { mask8x64 { @@ -7254,6 +7366,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_i16x32(self, a: i16x32, dest: &mut [i16; 32usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_i16x32(self, a: u8x64) -> i16x32 { unsafe { i16x32 { @@ -7474,6 +7590,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_u16x32(self, a: u16x32, dest: &mut [u16; 32usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_u16x32(self, a: u8x64) -> u16x32 { unsafe { u16x32 { @@ -7742,6 +7862,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_mask16x32(self, a: u8x64) -> mask16x32 { unsafe { mask16x32 { @@ -7866,6 +7990,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_i32x16(self, a: i32x16, dest: &mut [i32; 16usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_i32x16(self, a: u8x64) -> i32x16 { unsafe { i32x16 { @@ -8082,6 +8210,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_u32x16(self, a: u32x16, dest: &mut [u32; 16usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_u32x16(self, a: u8x64) -> u32x16 { unsafe { u32x16 { @@ -8315,6 +8447,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_mask32x16(self, a: u8x64) -> mask32x16 { unsafe { mask32x16 { @@ -8436,6 +8572,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_f64x8(self, a: f64x8, dest: &mut [f64; 8usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_f64x8(self, a: u8x64) -> f64x8 { unsafe { f64x8 { @@ -8685,6 +8825,10 @@ impl Simd for Fallback { &mut a.val.0 } #[inline(always)] + fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> () { + *dest = a.val.0; + } + #[inline(always)] fn cvt_from_bytes_mask64x8(self, a: u8x64) -> mask64x8 { unsafe { mask64x8 { diff --git a/fearless_simd/src/generated/neon.rs b/fearless_simd/src/generated/neon.rs index 8e432c59..def528dd 100644 --- a/fearless_simd/src/generated/neon.rs +++ b/fearless_simd/src/generated/neon.rs @@ -120,6 +120,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut float32x4_t, &mut [f32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f32x4(self, a: f32x4, dest: &mut [f32; 4usize]) -> () { + unsafe { vst1q_f32(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_f32x4(self, a: u8x16) -> f32x4 { unsafe { f32x4 { @@ -338,6 +342,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int8x16_t, &mut [i8; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i8x16(self, a: i8x16, dest: &mut [i8; 16usize]) -> () { + unsafe { vst1q_s8(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i8x16(self, a: u8x16) -> i8x16 { unsafe { i8x16 { @@ -505,6 +513,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut uint8x16_t, &mut [u8; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u8x16(self, a: u8x16, dest: &mut [u8; 16usize]) -> () { + unsafe { vst1q_u8(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u8x16(self, a: u8x16) -> u8x16 { unsafe { u8x16 { @@ -672,6 +684,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int8x16_t, &mut [i8; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> () { + unsafe { vst1q_s8(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16 { unsafe { mask8x16 { @@ -772,6 +788,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int16x8_t, &mut [i16; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i16x8(self, a: i16x8, dest: &mut [i16; 8usize]) -> () { + unsafe { vst1q_s16(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i16x8(self, a: u8x16) -> i16x8 { unsafe { i16x8 { @@ -939,6 +959,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut uint16x8_t, &mut [u16; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u16x8(self, a: u16x8, dest: &mut [u16; 8usize]) -> () { + unsafe { vst1q_u16(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u16x8(self, a: u8x16) -> u16x8 { unsafe { u16x8 { @@ -1102,6 +1126,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int16x8_t, &mut [i16; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> () { + unsafe { vst1q_s16(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8 { unsafe { mask16x8 { @@ -1202,6 +1230,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int32x4_t, &mut [i32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i32x4(self, a: i32x4, dest: &mut [i32; 4usize]) -> () { + unsafe { vst1q_s32(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i32x4(self, a: u8x16) -> i32x4 { unsafe { i32x4 { @@ -1373,6 +1405,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut uint32x4_t, &mut [u32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u32x4(self, a: u32x4, dest: &mut [u32; 4usize]) -> () { + unsafe { vst1q_u32(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u32x4(self, a: u8x16) -> u32x4 { unsafe { u32x4 { @@ -1536,6 +1572,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int32x4_t, &mut [i32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> () { + unsafe { vst1q_s32(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4 { unsafe { mask32x4 { @@ -1636,6 +1676,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut float64x2_t, &mut [f64; 2usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f64x2(self, a: f64x2, dest: &mut [f64; 2usize]) -> () { + unsafe { vst1q_f64(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_f64x2(self, a: u8x16) -> f64x2 { unsafe { f64x2 { @@ -1826,6 +1870,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int64x2_t, &mut [i64; 2usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> () { + unsafe { vst1q_s64(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2 { unsafe { mask64x2 { @@ -1927,6 +1975,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut float32x4x2_t, &mut [f32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f32x8(self, a: f32x8, dest: &mut [f32; 8usize]) -> () { + unsafe { vst1q_f32_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_f32x8(self, a: u8x32) -> f32x8 { unsafe { f32x8 { @@ -2237,6 +2289,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int8x16x2_t, &mut [i8; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i8x32(self, a: i8x32, dest: &mut [i8; 32usize]) -> () { + unsafe { vst1q_s8_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i8x32(self, a: u8x32) -> i8x32 { unsafe { i8x32 { @@ -2462,6 +2518,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut uint8x16x2_t, &mut [u8; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u8x32(self, a: u8x32, dest: &mut [u8; 32usize]) -> () { + unsafe { vst1q_u8_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u8x32(self, a: u8x32) -> u8x32 { unsafe { u8x32 { @@ -2682,6 +2742,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int8x16x2_t, &mut [i8; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> () { + unsafe { vst1q_s8_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask8x32(self, a: u8x32) -> mask8x32 { unsafe { mask8x32 { @@ -2817,6 +2881,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int16x8x2_t, &mut [i16; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i16x16(self, a: i16x16, dest: &mut [i16; 16usize]) -> () { + unsafe { vst1q_s16_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i16x16(self, a: u8x32) -> i16x16 { unsafe { i16x16 { @@ -3042,6 +3110,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut uint16x8x2_t, &mut [u16; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u16x16(self, a: u16x16, dest: &mut [u16; 16usize]) -> () { + unsafe { vst1q_u16_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u16x16(self, a: u8x32) -> u16x16 { unsafe { u16x16 { @@ -3271,6 +3343,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int16x8x2_t, &mut [i16; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> () { + unsafe { vst1q_s16_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask16x16(self, a: u8x32) -> mask16x16 { unsafe { mask16x16 { @@ -3406,6 +3482,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int32x4x2_t, &mut [i32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i32x8(self, a: i32x8, dest: &mut [i32; 8usize]) -> () { + unsafe { vst1q_s32_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i32x8(self, a: u8x32) -> i32x8 { unsafe { i32x8 { @@ -3636,6 +3716,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut uint32x4x2_t, &mut [u32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u32x8(self, a: u32x8, dest: &mut [u32; 8usize]) -> () { + unsafe { vst1q_u32_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u32x8(self, a: u8x32) -> u32x8 { unsafe { u32x8 { @@ -3853,6 +3937,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int32x4x2_t, &mut [i32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> () { + unsafe { vst1q_s32_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask32x8(self, a: u8x32) -> mask32x8 { unsafe { mask32x8 { @@ -3988,6 +4076,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut float64x2x2_t, &mut [f64; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f64x4(self, a: f64x4, dest: &mut [f64; 4usize]) -> () { + unsafe { vst1q_f64_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_f64x4(self, a: u8x32) -> f64x4 { unsafe { f64x4 { @@ -4251,6 +4343,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int64x2x2_t, &mut [i64; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> () { + unsafe { vst1q_s64_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask64x4(self, a: u8x32) -> mask64x4 { unsafe { mask64x4 { @@ -4386,6 +4482,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut float32x4x4_t, &mut [f32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f32x16(self, a: f32x16, dest: &mut [f32; 16usize]) -> () { + unsafe { vst1q_f32_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_f32x16(self, a: u8x64) -> f32x16 { unsafe { f32x16 { @@ -4695,6 +4795,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int8x16x4_t, &mut [i8; 64usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i8x64(self, a: i8x64, dest: &mut [i8; 64usize]) -> () { + unsafe { vst1q_s8_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i8x64(self, a: u8x64) -> i8x64 { unsafe { i8x64 { @@ -4911,6 +5015,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut uint8x16x4_t, &mut [u8; 64usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u8x64(self, a: u8x64, dest: &mut [u8; 64usize]) -> () { + unsafe { vst1q_u8_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u8x64(self, a: u8x64) -> u8x64 { unsafe { u8x64 { @@ -5125,6 +5233,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int8x16x4_t, &mut [i8; 64usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> () { + unsafe { vst1q_s8_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask8x64(self, a: u8x64) -> mask8x64 { unsafe { mask8x64 { @@ -5251,6 +5363,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int16x8x4_t, &mut [i16; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i16x32(self, a: i16x32, dest: &mut [i16; 32usize]) -> () { + unsafe { vst1q_s16_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i16x32(self, a: u8x64) -> i16x32 { unsafe { i16x32 { @@ -5476,6 +5592,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut uint16x8x4_t, &mut [u16; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u16x32(self, a: u16x32, dest: &mut [u16; 32usize]) -> () { + unsafe { vst1q_u16_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u16x32(self, a: u8x64) -> u16x32 { unsafe { u16x32 { @@ -5709,6 +5829,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int16x8x4_t, &mut [i16; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> () { + unsafe { vst1q_s16_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask16x32(self, a: u8x64) -> mask16x32 { unsafe { mask16x32 { @@ -5838,6 +5962,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int32x4x4_t, &mut [i32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i32x16(self, a: i32x16, dest: &mut [i32; 16usize]) -> () { + unsafe { vst1q_s32_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i32x16(self, a: u8x64) -> i32x16 { unsafe { i32x16 { @@ -6059,6 +6187,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut uint32x4x4_t, &mut [u32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u32x16(self, a: u32x16, dest: &mut [u32; 16usize]) -> () { + unsafe { vst1q_u32_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u32x16(self, a: u8x64) -> u32x16 { unsafe { u32x16 { @@ -6275,6 +6407,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int32x4x4_t, &mut [i32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> () { + unsafe { vst1q_s32_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask32x16(self, a: u8x64) -> mask32x16 { unsafe { mask32x16 { @@ -6401,6 +6537,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut float64x2x4_t, &mut [f64; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f64x8(self, a: f64x8, dest: &mut [f64; 8usize]) -> () { + unsafe { vst1q_f64_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_f64x8(self, a: u8x64) -> f64x8 { unsafe { f64x8 { @@ -6655,6 +6795,10 @@ impl Simd for Neon { unsafe { core::mem::transmute::<&mut int64x2x4_t, &mut [i64; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> () { + unsafe { vst1q_s64_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask64x8(self, a: u8x64) -> mask64x8 { unsafe { mask64x8 { diff --git a/fearless_simd/src/generated/simd_trait.rs b/fearless_simd/src/generated/simd_trait.rs index cba1593b..73ea9aa0 100644 --- a/fearless_simd/src/generated/simd_trait.rs +++ b/fearless_simd/src/generated/simd_trait.rs @@ -132,6 +132,8 @@ pub trait Simd: fn as_array_ref_f32x4(self, a: &f32x4) -> &[f32; 4usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_f32x4(self, a: &mut f32x4) -> &mut [f32; 4usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_f32x4(self, a: f32x4, dest: &mut [f32; 4usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_f32x4(self, a: u8x16) -> f32x4; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -224,6 +226,8 @@ pub trait Simd: fn as_array_ref_i8x16(self, a: &i8x16) -> &[i8; 16usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_i8x16(self, a: &mut i8x16) -> &mut [i8; 16usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_i8x16(self, a: i8x16, dest: &mut [i8; 16usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_i8x16(self, a: u8x16) -> i8x16; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -294,6 +298,8 @@ pub trait Simd: fn as_array_ref_u8x16(self, a: &u8x16) -> &[u8; 16usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_u8x16(self, a: &mut u8x16) -> &mut [u8; 16usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_u8x16(self, a: u8x16, dest: &mut [u8; 16usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_u8x16(self, a: u8x16) -> u8x16; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -362,6 +368,8 @@ pub trait Simd: fn as_array_ref_mask8x16(self, a: &mask8x16) -> &[i8; 16usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_mask8x16(self, a: &mut mask8x16) -> &mut [i8; 16usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -405,6 +413,8 @@ pub trait Simd: fn as_array_ref_i16x8(self, a: &i16x8) -> &[i16; 8usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_i16x8(self, a: &mut i16x8) -> &mut [i16; 8usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_i16x8(self, a: i16x8, dest: &mut [i16; 8usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_i16x8(self, a: u8x16) -> i16x8; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -475,6 +485,8 @@ pub trait Simd: fn as_array_ref_u16x8(self, a: &u16x8) -> &[u16; 8usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_u16x8(self, a: &mut u16x8) -> &mut [u16; 8usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_u16x8(self, a: u16x8, dest: &mut [u16; 8usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_u16x8(self, a: u8x16) -> u16x8; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -543,6 +555,8 @@ pub trait Simd: fn as_array_ref_mask16x8(self, a: &mask16x8) -> &[i16; 8usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_mask16x8(self, a: &mut mask16x8) -> &mut [i16; 8usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -586,6 +600,8 @@ pub trait Simd: fn as_array_ref_i32x4(self, a: &i32x4) -> &[i32; 4usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_i32x4(self, a: &mut i32x4) -> &mut [i32; 4usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_i32x4(self, a: i32x4, dest: &mut [i32; 4usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_i32x4(self, a: u8x16) -> i32x4; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -658,6 +674,8 @@ pub trait Simd: fn as_array_ref_u32x4(self, a: &u32x4) -> &[u32; 4usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_u32x4(self, a: &mut u32x4) -> &mut [u32; 4usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_u32x4(self, a: u32x4, dest: &mut [u32; 4usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_u32x4(self, a: u8x16) -> u32x4; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -726,6 +744,8 @@ pub trait Simd: fn as_array_ref_mask32x4(self, a: &mask32x4) -> &[i32; 4usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_mask32x4(self, a: &mut mask32x4) -> &mut [i32; 4usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -769,6 +789,8 @@ pub trait Simd: fn as_array_ref_f64x2(self, a: &f64x2) -> &[f64; 2usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_f64x2(self, a: &mut f64x2) -> &mut [f64; 2usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_f64x2(self, a: f64x2, dest: &mut [f64; 2usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_f64x2(self, a: u8x16) -> f64x2; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -847,6 +869,8 @@ pub trait Simd: fn as_array_ref_mask64x2(self, a: &mask64x2) -> &[i64; 2usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_mask64x2(self, a: &mut mask64x2) -> &mut [i64; 2usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -890,6 +914,8 @@ pub trait Simd: fn as_array_ref_f32x8(self, a: &f32x8) -> &[f32; 8usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_f32x8(self, a: &mut f32x8) -> &mut [f32; 8usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_f32x8(self, a: f32x8, dest: &mut [f32; 8usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_f32x8(self, a: u8x32) -> f32x8; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -984,6 +1010,8 @@ pub trait Simd: fn as_array_ref_i8x32(self, a: &i8x32) -> &[i8; 32usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_i8x32(self, a: &mut i8x32) -> &mut [i8; 32usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_i8x32(self, a: i8x32, dest: &mut [i8; 32usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_i8x32(self, a: u8x32) -> i8x32; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -1056,6 +1084,8 @@ pub trait Simd: fn as_array_ref_u8x32(self, a: &u8x32) -> &[u8; 32usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_u8x32(self, a: &mut u8x32) -> &mut [u8; 32usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_u8x32(self, a: u8x32, dest: &mut [u8; 32usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_u8x32(self, a: u8x32) -> u8x32; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -1126,6 +1156,8 @@ pub trait Simd: fn as_array_ref_mask8x32(self, a: &mask8x32) -> &[i8; 32usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_mask8x32(self, a: &mut mask8x32) -> &mut [i8; 32usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_mask8x32(self, a: u8x32) -> mask8x32; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -1171,6 +1203,8 @@ pub trait Simd: fn as_array_ref_i16x16(self, a: &i16x16) -> &[i16; 16usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_i16x16(self, a: &mut i16x16) -> &mut [i16; 16usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_i16x16(self, a: i16x16, dest: &mut [i16; 16usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_i16x16(self, a: u8x32) -> i16x16; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -1243,6 +1277,8 @@ pub trait Simd: fn as_array_ref_u16x16(self, a: &u16x16) -> &[u16; 16usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_u16x16(self, a: &mut u16x16) -> &mut [u16; 16usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_u16x16(self, a: u16x16, dest: &mut [u16; 16usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_u16x16(self, a: u8x32) -> u16x16; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -1315,6 +1351,8 @@ pub trait Simd: fn as_array_ref_mask16x16(self, a: &mask16x16) -> &[i16; 16usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_mask16x16(self, a: &mut mask16x16) -> &mut [i16; 16usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_mask16x16(self, a: u8x32) -> mask16x16; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -1360,6 +1398,8 @@ pub trait Simd: fn as_array_ref_i32x8(self, a: &i32x8) -> &[i32; 8usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_i32x8(self, a: &mut i32x8) -> &mut [i32; 8usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_i32x8(self, a: i32x8, dest: &mut [i32; 8usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_i32x8(self, a: u8x32) -> i32x8; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -1434,6 +1474,8 @@ pub trait Simd: fn as_array_ref_u32x8(self, a: &u32x8) -> &[u32; 8usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_u32x8(self, a: &mut u32x8) -> &mut [u32; 8usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_u32x8(self, a: u32x8, dest: &mut [u32; 8usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_u32x8(self, a: u8x32) -> u32x8; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -1504,6 +1546,8 @@ pub trait Simd: fn as_array_ref_mask32x8(self, a: &mask32x8) -> &[i32; 8usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_mask32x8(self, a: &mut mask32x8) -> &mut [i32; 8usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_mask32x8(self, a: u8x32) -> mask32x8; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -1549,6 +1593,8 @@ pub trait Simd: fn as_array_ref_f64x4(self, a: &f64x4) -> &[f64; 4usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_f64x4(self, a: &mut f64x4) -> &mut [f64; 4usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_f64x4(self, a: f64x4, dest: &mut [f64; 4usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_f64x4(self, a: u8x32) -> f64x4; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -1629,6 +1675,8 @@ pub trait Simd: fn as_array_ref_mask64x4(self, a: &mask64x4) -> &[i64; 4usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_mask64x4(self, a: &mut mask64x4) -> &mut [i64; 4usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_mask64x4(self, a: u8x32) -> mask64x4; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -1674,6 +1722,8 @@ pub trait Simd: fn as_array_ref_f32x16(self, a: &f32x16) -> &[f32; 16usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_f32x16(self, a: &mut f32x16) -> &mut [f32; 16usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_f32x16(self, a: f32x16, dest: &mut [f32; 16usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_f32x16(self, a: u8x64) -> f32x16; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -1770,6 +1820,8 @@ pub trait Simd: fn as_array_ref_i8x64(self, a: &i8x64) -> &[i8; 64usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_i8x64(self, a: &mut i8x64) -> &mut [i8; 64usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_i8x64(self, a: i8x64, dest: &mut [i8; 64usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_i8x64(self, a: u8x64) -> i8x64; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -1840,6 +1892,8 @@ pub trait Simd: fn as_array_ref_u8x64(self, a: &u8x64) -> &[u8; 64usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_u8x64(self, a: &mut u8x64) -> &mut [u8; 64usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_u8x64(self, a: u8x64, dest: &mut [u8; 64usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_u8x64(self, a: u8x64) -> u8x64; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -1910,6 +1964,8 @@ pub trait Simd: fn as_array_ref_mask8x64(self, a: &mask8x64) -> &[i8; 64usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_mask8x64(self, a: &mut mask8x64) -> &mut [i8; 64usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_mask8x64(self, a: u8x64) -> mask8x64; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -1953,6 +2009,8 @@ pub trait Simd: fn as_array_ref_i16x32(self, a: &i16x32) -> &[i16; 32usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_i16x32(self, a: &mut i16x32) -> &mut [i16; 32usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_i16x32(self, a: i16x32, dest: &mut [i16; 32usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_i16x32(self, a: u8x64) -> i16x32; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -2023,6 +2081,8 @@ pub trait Simd: fn as_array_ref_u16x32(self, a: &u16x32) -> &[u16; 32usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_u16x32(self, a: &mut u16x32) -> &mut [u16; 32usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_u16x32(self, a: u16x32, dest: &mut [u16; 32usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_u16x32(self, a: u8x64) -> u16x32; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -2097,6 +2157,8 @@ pub trait Simd: fn as_array_ref_mask16x32(self, a: &mask16x32) -> &[i16; 32usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_mask16x32(self, a: &mut mask16x32) -> &mut [i16; 32usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_mask16x32(self, a: u8x64) -> mask16x32; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -2140,6 +2202,8 @@ pub trait Simd: fn as_array_ref_i32x16(self, a: &i32x16) -> &[i32; 16usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_i32x16(self, a: &mut i32x16) -> &mut [i32; 16usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_i32x16(self, a: i32x16, dest: &mut [i32; 16usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_i32x16(self, a: u8x64) -> i32x16; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -2212,6 +2276,8 @@ pub trait Simd: fn as_array_ref_u32x16(self, a: &u32x16) -> &[u32; 16usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_u32x16(self, a: &mut u32x16) -> &mut [u32; 16usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_u32x16(self, a: u32x16, dest: &mut [u32; 16usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_u32x16(self, a: u8x64) -> u32x16; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -2284,6 +2350,8 @@ pub trait Simd: fn as_array_ref_mask32x16(self, a: &mask32x16) -> &[i32; 16usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_mask32x16(self, a: &mut mask32x16) -> &mut [i32; 16usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_mask32x16(self, a: u8x64) -> mask32x16; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -2327,6 +2395,8 @@ pub trait Simd: fn as_array_ref_f64x8(self, a: &f64x8) -> &[f64; 8usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_f64x8(self, a: &mut f64x8) -> &mut [f64; 8usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_f64x8(self, a: f64x8, dest: &mut [f64; 8usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_f64x8(self, a: u8x64) -> f64x8; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] @@ -2405,6 +2475,8 @@ pub trait Simd: fn as_array_ref_mask64x8(self, a: &mask64x8) -> &[i64; 8usize]; #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] fn as_array_mut_mask64x8(self, a: &mut mask64x8) -> &mut [i64; 8usize]; + #[doc = "Store a SIMD vector into an array of the same length."] + fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> (); #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] fn cvt_from_bytes_mask64x8(self, a: u8x64) -> mask64x8; #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] diff --git a/fearless_simd/src/generated/sse4_2.rs b/fearless_simd/src/generated/sse4_2.rs index 58843ad5..a9f7c1ad 100644 --- a/fearless_simd/src/generated/sse4_2.rs +++ b/fearless_simd/src/generated/sse4_2.rs @@ -133,6 +133,10 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut __m128, &mut [f32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f32x4(self, a: f32x4, dest: &mut [f32; 4usize]) -> () { + unsafe { _mm_storeu_ps(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_f32x4(self, a: u8x16) -> f32x4 { unsafe { f32x4 { @@ -399,6 +403,10 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [i8; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i8x16(self, a: i8x16, dest: &mut [i8; 16usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i8x16(self, a: u8x16) -> i8x16 { unsafe { i8x16 { @@ -593,6 +601,10 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [u8; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u8x16(self, a: u8x16, dest: &mut [u8; 16usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u8x16(self, a: u8x16) -> u8x16 { unsafe { u8x16 { @@ -798,6 +810,10 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [i8; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16 { unsafe { mask8x16 { @@ -898,6 +914,10 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [i16; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i16x8(self, a: i16x8, dest: &mut [i16; 8usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i16x8(self, a: u8x16) -> i16x8 { unsafe { i16x8 { @@ -1067,6 +1087,10 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [u16; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u16x8(self, a: u16x8, dest: &mut [u16; 8usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u16x8(self, a: u8x16) -> u16x8 { unsafe { u16x8 { @@ -1242,6 +1266,10 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [i16; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8 { unsafe { mask16x8 { @@ -1342,6 +1370,10 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [i32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i32x4(self, a: i32x4, dest: &mut [i32; 4usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i32x4(self, a: u8x16) -> i32x4 { unsafe { i32x4 { @@ -1513,6 +1545,10 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [u32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u32x4(self, a: u32x4, dest: &mut [u32; 4usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u32x4(self, a: u8x16) -> u32x4 { unsafe { u32x4 { @@ -1696,6 +1732,10 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [i32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4 { unsafe { mask32x4 { @@ -1796,6 +1836,10 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut __m128d, &mut [f64; 2usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f64x2(self, a: f64x2, dest: &mut [f64; 2usize]) -> () { + unsafe { _mm_storeu_pd(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_f64x2(self, a: u8x16) -> f64x2 { unsafe { f64x2 { @@ -1991,6 +2035,10 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut __m128i, &mut [i64; 2usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> () { + unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2 { unsafe { mask64x2 { @@ -2102,6 +2150,13 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128; 2usize], &mut [f32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f32x8(self, a: f32x8, dest: &mut [f32; 8usize]) -> () { + unsafe { + _mm_storeu_ps(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_ps(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_f32x8(self, a: u8x32) -> f32x8 { unsafe { f32x8 { @@ -2420,6 +2475,13 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 2usize], &mut [i8; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i8x32(self, a: i8x32, dest: &mut [i8; 32usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_i8x32(self, a: u8x32) -> i8x32 { unsafe { i8x32 { @@ -2653,6 +2715,13 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 2usize], &mut [u8; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u8x32(self, a: u8x32, dest: &mut [u8; 32usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_u8x32(self, a: u8x32) -> u8x32 { unsafe { u8x32 { @@ -2881,6 +2950,13 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 2usize], &mut [i8; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_mask8x32(self, a: u8x32) -> mask8x32 { unsafe { mask8x32 { @@ -3024,6 +3100,13 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 2usize], &mut [i16; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i16x16(self, a: i16x16, dest: &mut [i16; 16usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_i16x16(self, a: u8x32) -> i16x16 { unsafe { i16x16 { @@ -3257,6 +3340,13 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 2usize], &mut [u16; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u16x16(self, a: u16x16, dest: &mut [u16; 16usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_u16x16(self, a: u8x32) -> u16x16 { unsafe { u16x16 { @@ -3496,6 +3586,13 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 2usize], &mut [i16; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_mask16x16(self, a: u8x32) -> mask16x16 { unsafe { mask16x16 { @@ -3639,6 +3736,13 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 2usize], &mut [i32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i32x8(self, a: i32x8, dest: &mut [i32; 8usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_i32x8(self, a: u8x32) -> i32x8 { unsafe { i32x8 { @@ -3877,6 +3981,13 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 2usize], &mut [u32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u32x8(self, a: u32x8, dest: &mut [u32; 8usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_u32x8(self, a: u8x32) -> u32x8 { unsafe { u32x8 { @@ -4102,6 +4213,13 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 2usize], &mut [i32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_mask32x8(self, a: u8x32) -> mask32x8 { unsafe { mask32x8 { @@ -4245,6 +4363,13 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128d; 2usize], &mut [f64; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f64x4(self, a: f64x4, dest: &mut [f64; 4usize]) -> () { + unsafe { + _mm_storeu_pd(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_pd(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_f64x4(self, a: u8x32) -> f64x4 { unsafe { f64x4 { @@ -4516,6 +4641,13 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 2usize], &mut [i64; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_mask64x4(self, a: u8x32) -> mask64x4 { unsafe { mask64x4 { @@ -4663,6 +4795,15 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128; 4usize], &mut [f32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f32x16(self, a: f32x16, dest: &mut [f32; 16usize]) -> () { + unsafe { + _mm_storeu_ps(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_ps(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + _mm_storeu_ps(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); + _mm_storeu_ps(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_f32x16(self, a: u8x64) -> f32x16 { unsafe { f32x16 { @@ -5023,6 +5164,15 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 4usize], &mut [i8; 64usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i8x64(self, a: i8x64, dest: &mut [i8; 64usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + _mm_storeu_si128(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[2]); + _mm_storeu_si128(dest.as_mut_ptr().add(48usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_i8x64(self, a: u8x64) -> i8x64 { unsafe { i8x64 { @@ -5253,6 +5403,15 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 4usize], &mut [u8; 64usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u8x64(self, a: u8x64, dest: &mut [u8; 64usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + _mm_storeu_si128(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[2]); + _mm_storeu_si128(dest.as_mut_ptr().add(48usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_u8x64(self, a: u8x64) -> u8x64 { unsafe { u8x64 { @@ -5528,6 +5687,15 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 4usize], &mut [i8; 64usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + _mm_storeu_si128(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[2]); + _mm_storeu_si128(dest.as_mut_ptr().add(48usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_mask8x64(self, a: u8x64) -> mask8x64 { unsafe { mask8x64 { @@ -5668,6 +5836,15 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 4usize], &mut [i16; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i16x32(self, a: i16x32, dest: &mut [i16; 32usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[2]); + _mm_storeu_si128(dest.as_mut_ptr().add(24usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_i16x32(self, a: u8x64) -> i16x32 { unsafe { i16x32 { @@ -5907,6 +6084,15 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 4usize], &mut [u16; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u16x32(self, a: u16x32, dest: &mut [u16; 32usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[2]); + _mm_storeu_si128(dest.as_mut_ptr().add(24usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_u16x32(self, a: u8x64) -> u16x32 { unsafe { u16x32 { @@ -6201,6 +6387,15 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 4usize], &mut [i16; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[2]); + _mm_storeu_si128(dest.as_mut_ptr().add(24usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_mask16x32(self, a: u8x64) -> mask16x32 { unsafe { mask16x32 { @@ -6344,6 +6539,15 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 4usize], &mut [i32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i32x16(self, a: i32x16, dest: &mut [i32; 16usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); + _mm_storeu_si128(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_i32x16(self, a: u8x64) -> i32x16 { unsafe { i32x16 { @@ -6579,6 +6783,15 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 4usize], &mut [u32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u32x16(self, a: u32x16, dest: &mut [u32; 16usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); + _mm_storeu_si128(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_u32x16(self, a: u8x64) -> u32x16 { unsafe { u32x16 { @@ -6846,6 +7059,15 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 4usize], &mut [i32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); + _mm_storeu_si128(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_mask32x16(self, a: u8x64) -> mask32x16 { unsafe { mask32x16 { @@ -6986,6 +7208,15 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128d; 4usize], &mut [f64; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f64x8(self, a: f64x8, dest: &mut [f64; 8usize]) -> () { + unsafe { + _mm_storeu_pd(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_pd(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); + _mm_storeu_pd(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[2]); + _mm_storeu_pd(dest.as_mut_ptr().add(6usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_f64x8(self, a: u8x64) -> f64x8 { unsafe { f64x8 { @@ -7254,6 +7485,15 @@ impl Simd for Sse4_2 { unsafe { core::mem::transmute::<&mut [__m128i; 4usize], &mut [i64; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> () { + unsafe { + _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + _mm_storeu_si128(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); + _mm_storeu_si128(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[2]); + _mm_storeu_si128(dest.as_mut_ptr().add(6usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_mask64x8(self, a: u8x64) -> mask64x8 { unsafe { mask64x8 { diff --git a/fearless_simd/src/generated/wasm.rs b/fearless_simd/src/generated/wasm.rs index 2c76d0fc..59c94768 100644 --- a/fearless_simd/src/generated/wasm.rs +++ b/fearless_simd/src/generated/wasm.rs @@ -115,6 +115,10 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut v128, &mut [f32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f32x4(self, a: f32x4, dest: &mut [f32; 4usize]) -> () { + unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_f32x4(self, a: u8x16) -> f32x4 { unsafe { f32x4 { @@ -374,6 +378,10 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut v128, &mut [i8; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i8x16(self, a: i8x16, dest: &mut [i8; 16usize]) -> () { + unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i8x16(self, a: u8x16) -> i8x16 { unsafe { i8x16 { @@ -556,6 +564,10 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut v128, &mut [u8; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u8x16(self, a: u8x16, dest: &mut [u8; 16usize]) -> () { + unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u8x16(self, a: u8x16) -> u8x16 { unsafe { u8x16 { @@ -736,6 +748,10 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut v128, &mut [i8; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> () { + unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16 { unsafe { mask8x16 { @@ -843,6 +859,10 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut v128, &mut [i16; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i16x8(self, a: i16x8, dest: &mut [i16; 8usize]) -> () { + unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i16x8(self, a: u8x16) -> i16x8 { unsafe { i16x8 { @@ -1009,6 +1029,10 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut v128, &mut [u16; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u16x8(self, a: u16x8, dest: &mut [u16; 8usize]) -> () { + unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u16x8(self, a: u8x16) -> u16x8 { unsafe { u16x8 { @@ -1171,6 +1195,10 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut v128, &mut [i16; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> () { + unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8 { unsafe { mask16x8 { @@ -1278,6 +1306,10 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut v128, &mut [i32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i32x4(self, a: i32x4, dest: &mut [i32; 4usize]) -> () { + unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_i32x4(self, a: u8x16) -> i32x4 { unsafe { i32x4 { @@ -1448,6 +1480,10 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut v128, &mut [u32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u32x4(self, a: u32x4, dest: &mut [u32; 4usize]) -> () { + unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_u32x4(self, a: u8x16) -> u32x4 { unsafe { u32x4 { @@ -1610,6 +1646,10 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut v128, &mut [i32; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> () { + unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4 { unsafe { mask32x4 { @@ -1717,6 +1757,10 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut v128, &mut [f64; 2usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f64x2(self, a: f64x2, dest: &mut [f64; 2usize]) -> () { + unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_f64x2(self, a: u8x16) -> f64x2 { unsafe { f64x2 { @@ -1934,6 +1978,10 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut v128, &mut [i64; 2usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> () { + unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + } + #[inline(always)] fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2 { unsafe { mask64x2 { @@ -2052,6 +2100,13 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [f32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f32x8(self, a: f32x8, dest: &mut [f32; 8usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_f32x8(self, a: u8x32) -> f32x8 { unsafe { f32x8 { @@ -2370,6 +2425,13 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [i8; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i8x32(self, a: i8x32, dest: &mut [i8; 32usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_i8x32(self, a: u8x32) -> i8x32 { unsafe { i8x32 { @@ -2603,6 +2665,13 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [u8; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u8x32(self, a: u8x32, dest: &mut [u8; 32usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_u8x32(self, a: u8x32) -> u8x32 { unsafe { u8x32 { @@ -2831,6 +2900,13 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [i8; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_mask8x32(self, a: u8x32) -> mask8x32 { unsafe { mask8x32 { @@ -2974,6 +3050,13 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [i16; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i16x16(self, a: i16x16, dest: &mut [i16; 16usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_i16x16(self, a: u8x32) -> i16x16 { unsafe { i16x16 { @@ -3207,6 +3290,13 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [u16; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u16x16(self, a: u16x16, dest: &mut [u16; 16usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_u16x16(self, a: u8x32) -> u16x16 { unsafe { u16x16 { @@ -3444,6 +3534,13 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [i16; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_mask16x16(self, a: u8x32) -> mask16x16 { unsafe { mask16x16 { @@ -3587,6 +3684,13 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [i32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i32x8(self, a: i32x8, dest: &mut [i32; 8usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_i32x8(self, a: u8x32) -> i32x8 { unsafe { i32x8 { @@ -3825,6 +3929,13 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [u32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u32x8(self, a: u32x8, dest: &mut [u32; 8usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_u32x8(self, a: u8x32) -> u32x8 { unsafe { u32x8 { @@ -4050,6 +4161,13 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [i32; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_mask32x8(self, a: u8x32) -> mask32x8 { unsafe { mask32x8 { @@ -4193,6 +4311,13 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [f64; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f64x4(self, a: f64x4, dest: &mut [f64; 4usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_f64x4(self, a: u8x32) -> f64x4 { unsafe { f64x4 { @@ -4464,6 +4589,13 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [i64; 4usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); + } + } + #[inline(always)] fn cvt_from_bytes_mask64x4(self, a: u8x32) -> mask64x4 { unsafe { mask64x4 { @@ -4611,6 +4743,15 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [f32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f32x16(self, a: f32x16, dest: &mut [f32; 16usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); + v128_store(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_f32x16(self, a: u8x64) -> f32x16 { unsafe { f32x16 { @@ -4968,6 +5109,15 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [i8; 64usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i8x64(self, a: i8x64, dest: &mut [i8; 64usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + v128_store(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[2]); + v128_store(dest.as_mut_ptr().add(48usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_i8x64(self, a: u8x64) -> i8x64 { unsafe { i8x64 { @@ -5198,6 +5348,15 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [u8; 64usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u8x64(self, a: u8x64, dest: &mut [u8; 64usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + v128_store(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[2]); + v128_store(dest.as_mut_ptr().add(48usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_u8x64(self, a: u8x64) -> u8x64 { unsafe { u8x64 { @@ -5484,6 +5643,15 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [i8; 64usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + v128_store(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[2]); + v128_store(dest.as_mut_ptr().add(48usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_mask8x64(self, a: u8x64) -> mask8x64 { unsafe { mask8x64 { @@ -5624,6 +5792,15 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [i16; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i16x32(self, a: i16x32, dest: &mut [i16; 32usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[2]); + v128_store(dest.as_mut_ptr().add(24usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_i16x32(self, a: u8x64) -> i16x32 { unsafe { i16x32 { @@ -5863,6 +6040,15 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [u16; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u16x32(self, a: u16x32, dest: &mut [u16; 32usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[2]); + v128_store(dest.as_mut_ptr().add(24usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_u16x32(self, a: u8x64) -> u16x32 { unsafe { u16x32 { @@ -6144,6 +6330,15 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [i16; 32usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[2]); + v128_store(dest.as_mut_ptr().add(24usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_mask16x32(self, a: u8x64) -> mask16x32 { unsafe { mask16x32 { @@ -6287,6 +6482,15 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [i32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_i32x16(self, a: i32x16, dest: &mut [i32; 16usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); + v128_store(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_i32x16(self, a: u8x64) -> i32x16 { unsafe { i32x16 { @@ -6522,6 +6726,15 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [u32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_u32x16(self, a: u32x16, dest: &mut [u32; 16usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); + v128_store(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_u32x16(self, a: u8x64) -> u32x16 { unsafe { u32x16 { @@ -6786,6 +6999,15 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [i32; 16usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); + v128_store(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_mask32x16(self, a: u8x64) -> mask32x16 { unsafe { mask32x16 { @@ -6926,6 +7148,15 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [f64; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_f64x8(self, a: f64x8, dest: &mut [f64; 8usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); + v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[2]); + v128_store(dest.as_mut_ptr().add(6usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_f64x8(self, a: u8x64) -> f64x8 { unsafe { f64x8 { @@ -7194,6 +7425,15 @@ impl Simd for WasmSimd128 { unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [i64; 8usize]>(&mut a.val.0) } } #[inline(always)] + fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> () { + unsafe { + v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); + v128_store(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); + v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[2]); + v128_store(dest.as_mut_ptr().add(6usize) as *mut _, a.val.0[3]); + } + } + #[inline(always)] fn cvt_from_bytes_mask64x8(self, a: u8x64) -> mask64x8 { unsafe { mask64x8 { diff --git a/fearless_simd_gen/src/arch/neon.rs b/fearless_simd_gen/src/arch/neon.rs index c5626278..081a3242 100644 --- a/fearless_simd_gen/src/arch/neon.rs +++ b/fearless_simd_gen/src/arch/neon.rs @@ -98,7 +98,7 @@ pub(crate) fn simple_intrinsic(name: &str, ty: &VecType) -> Ident { ) } -pub(crate) fn load_intrinsic(ty: &VecType) -> Ident { +fn memory_intrinsic(op: &str, ty: &VecType) -> Ident { let (opt_q, scalar_c, size) = neon_array_type(ty); let num_blocks = ty.n_bits() / 128; let opt_count = if num_blocks > 1 { @@ -107,11 +107,19 @@ pub(crate) fn load_intrinsic(ty: &VecType) -> Ident { String::new() }; Ident::new( - &format!("vld1{opt_q}_{scalar_c}{size}{opt_count}"), + &format!("{op}1{opt_q}_{scalar_c}{size}{opt_count}"), Span::call_site(), ) } +pub(crate) fn load_intrinsic(ty: &VecType) -> Ident { + memory_intrinsic("vld", ty) +} + +pub(crate) fn store_intrinsic(ty: &VecType) -> Ident { + memory_intrinsic("vst", ty) +} + pub(crate) fn split_intrinsic(name: &str, name2: &str, ty: &VecType) -> Ident { let (opt_q, scalar_c, size) = neon_array_type(ty); Ident::new( diff --git a/fearless_simd_gen/src/generic.rs b/fearless_simd_gen/src/generic.rs index 1900e171..a92230c4 100644 --- a/fearless_simd_gen/src/generic.rs +++ b/fearless_simd_gen/src/generic.rs @@ -198,7 +198,8 @@ pub(crate) fn generic_op(op: &Op, ty: &VecType) -> TokenStream { OpSig::Split { .. } | OpSig::Combine { .. } | OpSig::AsArray { .. } - | OpSig::FromArray { .. } => { + | OpSig::FromArray { .. } + | OpSig::ToArray => { panic!("These operations require more information about the target platform"); } OpSig::FromBytes => generic_from_bytes(method_sig, ty), @@ -332,6 +333,49 @@ pub(crate) fn generic_as_array( } } +pub(crate) fn generic_to_array( + method_sig: TokenStream, + vec_ty: &VecType, + max_block_size: usize, + store_unaligned_block: impl Fn(&VecType) -> Ident, +) -> TokenStream { + let block_size = max_block_size.min(vec_ty.n_bits()); + let block_count = vec_ty.n_bits() / block_size; + let num_scalars_per_block = vec_ty.len / block_count; + + let native_block_ty = VecType::new( + vec_ty.scalar, + vec_ty.scalar_bits, + block_size / vec_ty.scalar_bits, + ); + + let store_unaligned = store_unaligned_block(&native_block_ty); + let store_expr = if block_count == 1 { + quote! { + unsafe { #store_unaligned(dest.as_mut_ptr() as *mut _, a.val.0) } + } + } else { + let blocks = (0..block_count).map(|n| { + let offset = n * num_scalars_per_block; + let block_idx = proc_macro2::Literal::usize_unsuffixed(n); + quote! { + #store_unaligned(dest.as_mut_ptr().add(#offset) as *mut _, a.val.0[#block_idx]) + } + }); + quote! { + unsafe { + #(#blocks;)* + } + } + }; + + quote! { + #method_sig { + #store_expr + } + } +} + pub(crate) fn generic_to_bytes(method_sig: TokenStream, vec_ty: &VecType) -> TokenStream { let bytes_ty = vec_ty.reinterpret(ScalarType::Unsigned, 8).rust(); quote! { diff --git a/fearless_simd_gen/src/mk_fallback.rs b/fearless_simd_gen/src/mk_fallback.rs index cd253243..d1d65cae 100644 --- a/fearless_simd_gen/src/mk_fallback.rs +++ b/fearless_simd_gen/src/mk_fallback.rs @@ -488,6 +488,13 @@ impl Level for Fallback { } } } + OpSig::ToArray => { + quote! { + #method_sig { + *dest = a.val.0; + } + } + } OpSig::FromBytes => generic_from_bytes(method_sig, vec_ty), OpSig::ToBytes => generic_to_bytes(method_sig, vec_ty), } diff --git a/fearless_simd_gen/src/mk_neon.rs b/fearless_simd_gen/src/mk_neon.rs index 2e4af67a..dc2b56b2 100644 --- a/fearless_simd_gen/src/mk_neon.rs +++ b/fearless_simd_gen/src/mk_neon.rs @@ -4,9 +4,10 @@ use proc_macro2::{Ident, Literal, Span, TokenStream}; use quote::{ToTokens as _, format_ident, quote}; -use crate::arch::neon::load_intrinsic; +use crate::arch::neon::{load_intrinsic, store_intrinsic}; use crate::generic::{ - generic_as_array, generic_from_array, generic_from_bytes, generic_op_name, generic_to_bytes, + generic_as_array, generic_from_array, generic_from_bytes, generic_op_name, generic_to_array, + generic_to_bytes, }; use crate::level::Level; use crate::ops::{Op, valid_reinterpret}; @@ -473,6 +474,9 @@ impl Level for Neon { self.arch_ty(vec_ty) }) } + OpSig::ToArray => { + generic_to_array(method_sig, vec_ty, self.max_block_size(), store_intrinsic) + } OpSig::FromBytes => generic_from_bytes(method_sig, vec_ty), OpSig::ToBytes => generic_to_bytes(method_sig, vec_ty), } diff --git a/fearless_simd_gen/src/mk_wasm.rs b/fearless_simd_gen/src/mk_wasm.rs index 61da3d70..f3ae8ac7 100644 --- a/fearless_simd_gen/src/mk_wasm.rs +++ b/fearless_simd_gen/src/mk_wasm.rs @@ -7,7 +7,7 @@ use quote::{format_ident, quote}; use crate::arch::wasm::{arch_prefix, v128_intrinsic}; use crate::generic::{ generic_as_array, generic_block_combine, generic_block_split, generic_from_array, - generic_from_bytes, generic_op_name, generic_to_bytes, scalar_binary, + generic_from_bytes, generic_op_name, generic_to_array, generic_to_bytes, scalar_binary, }; use crate::level::Level; use crate::ops::{Op, Quantifier, valid_reinterpret}; @@ -623,6 +623,9 @@ impl Level for WasmSimd128 { Ident::new("v128", Span::call_site()) }) } + OpSig::ToArray => generic_to_array(method_sig, vec_ty, self.max_block_size(), |_| { + v128_intrinsic("store") + }), OpSig::FromBytes => generic_from_bytes(method_sig, vec_ty), OpSig::ToBytes => generic_to_bytes(method_sig, vec_ty), } diff --git a/fearless_simd_gen/src/mk_x86.rs b/fearless_simd_gen/src/mk_x86.rs index 15973f6e..6f11b538 100644 --- a/fearless_simd_gen/src/mk_x86.rs +++ b/fearless_simd_gen/src/mk_x86.rs @@ -8,7 +8,7 @@ use crate::arch::x86::{ }; use crate::generic::{ generic_as_array, generic_block_combine, generic_block_split, generic_from_array, - generic_from_bytes, generic_op_name, generic_to_bytes, scalar_binary, + generic_from_bytes, generic_op_name, generic_to_array, generic_to_bytes, scalar_binary, }; use crate::level::Level; use crate::ops::{Op, OpSig, Quantifier, valid_reinterpret}; @@ -182,6 +182,9 @@ impl Level for X86 { self.arch_ty(vec_ty) }) } + OpSig::ToArray => generic_to_array(method_sig, vec_ty, self.max_block_size(), |block_ty| { + intrinsic_ident("storeu", coarse_type(block_ty), block_ty.n_bits()) + }), OpSig::FromBytes => generic_from_bytes(method_sig, vec_ty), OpSig::ToBytes => generic_to_bytes(method_sig, vec_ty), } diff --git a/fearless_simd_gen/src/ops.rs b/fearless_simd_gen/src/ops.rs index a7462f47..ab779614 100644 --- a/fearless_simd_gen/src/ops.rs +++ b/fearless_simd_gen/src/ops.rs @@ -106,6 +106,8 @@ pub(crate) enum OpSig { /// Takes a single argument of the vector type, or a reference to it, and returns the corresponding array type (e.g. /// `[f32; 4]` for `f32x4`) or a reference to it. AsArray { kind: RefKind }, + /// Takes a vector and a mutable reference to an array, and stores the vector elements into the array. + ToArray, /// Takes a single argument of the vector type, and returns a vector type with `u8` elements and the same bit width. FromBytes, /// Takes a single argument of a vector type with `u8` elements, and returns a vector type with different elements @@ -271,6 +273,14 @@ impl Op { let array_ty = quote! { [#rust_scalar; #len] }; quote! { (self, #arg0: #ref_tok #ty) -> #ref_tok #array_ty } } + OpSig::ToArray => { + let arg0 = &arg_names[0]; + let arg1 = &arg_names[1]; + let rust_scalar = vec_ty.scalar.rust(vec_ty.scalar_bits); + let len = vec_ty.len; + let array_ty = quote! { [#rust_scalar; #len] }; + quote! { (self, #arg0: #ty, #arg1: &mut #array_ty) -> () } + } OpSig::FromBytes => { let arg0 = &arg_names[0]; let bytes_ty = vec_ty.reinterpret(ScalarType::Unsigned, 8).rust(); @@ -297,7 +307,7 @@ impl Op { .collect::>(); let method_ident = Ident::new(self.method, Span::call_site()); let sig_inner = match &self.sig { - OpSig::Splat | OpSig::LoadInterleaved { .. } | OpSig::StoreInterleaved { .. } => { + OpSig::Splat | OpSig::LoadInterleaved { .. } | OpSig::StoreInterleaved { .. } | OpSig::ToArray => { return None; } OpSig::Unary @@ -434,6 +444,12 @@ const BASE_OPS: &[Op] = &[ OpSig::AsArray { kind: RefKind::Mut }, "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array.", ), + Op::new( + "store_array", + OpKind::AssociatedOnly, + OpSig::ToArray, + "Store a SIMD vector into an array of the same length.", + ), Op::new( "cvt_from_bytes", OpKind::OwnTrait, @@ -1282,6 +1298,7 @@ impl OpSig { | Self::StoreInterleaved { .. } | Self::FromArray { .. } | Self::AsArray { .. } + | Self::ToArray ) { return false; } @@ -1314,7 +1331,7 @@ impl OpSig { Self::Ternary | Self::Select => &["a", "b", "c"], Self::Shift => &["a", "shift"], Self::LoadInterleaved { .. } => &["src"], - Self::StoreInterleaved { .. } => &["a", "dest"], + Self::StoreInterleaved { .. } | Self::ToArray => &["a", "dest"], } } fn vec_trait_arg_names(&self) -> &'static [&'static str] { @@ -1323,7 +1340,8 @@ impl OpSig { | Self::LoadInterleaved { .. } | Self::StoreInterleaved { .. } | Self::FromArray { .. } - | Self::FromBytes { .. } => &[], + | Self::FromBytes { .. } + | Self::ToArray => &[], Self::Unary | Self::Cvt { .. } | Self::Reinterpret { .. } @@ -1377,6 +1395,7 @@ impl OpSig { | Self::StoreInterleaved { .. } | Self::FromArray { .. } | Self::AsArray { .. } + | Self::ToArray | Self::FromBytes | Self::ToBytes => return None, }; diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index 233a16be..0f56649a 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -2984,3 +2984,206 @@ fn index_consistency(simd: S) { assert_eq!(i, *v.index_mut(i) as usize); } } +#[simd_test] +fn store_array_f32x4(simd: S) { + let a = f32x4::from_slice(simd, &[1.0, 2.0, 3.0, 4.0]); + let mut dest = [0.0_f32; 4]; + simd.store_array_f32x4(a, &mut dest); + assert_eq!(dest, [1.0, 2.0, 3.0, 4.0]); +} + +#[simd_test] +fn store_array_f32x8(simd: S) { + let a = f32x8::from_slice(simd, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); + let mut dest = [0.0_f32; 8]; + simd.store_array_f32x8(a, &mut dest); + assert_eq!(dest, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); +} + +#[simd_test] +fn store_array_f64x2(simd: S) { + let a = f64x2::from_slice(simd, &[1.5, 2.5]); + let mut dest = [0.0_f64; 2]; + simd.store_array_f64x2(a, &mut dest); + assert_eq!(dest, [1.5, 2.5]); +} + +#[simd_test] +fn store_array_f64x4(simd: S) { + let a = f64x4::from_slice(simd, &[1.5, 2.5, 3.5, 4.5]); + let mut dest = [0.0_f64; 4]; + simd.store_array_f64x4(a, &mut dest); + assert_eq!(dest, [1.5, 2.5, 3.5, 4.5]); +} + +#[simd_test] +fn store_array_i8x16(simd: S) { + let a = i8x16::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let mut dest = [0_i8; 16]; + simd.store_array_i8x16(a, &mut dest); + assert_eq!(dest, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); +} + +#[simd_test] +fn store_array_i16x8(simd: S) { + let a = i16x8::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8]); + let mut dest = [0_i16; 8]; + simd.store_array_i16x8(a, &mut dest); + assert_eq!(dest, [1, 2, 3, 4, 5, 6, 7, 8]); +} + +#[simd_test] +fn store_array_i32x4(simd: S) { + let a = i32x4::from_slice(simd, &[1, 2, 3, 4]); + let mut dest = [0_i32; 4]; + simd.store_array_i32x4(a, &mut dest); + assert_eq!(dest, [1, 2, 3, 4]); +} + +#[simd_test] +fn store_array_u8x16(simd: S) { + let a = u8x16::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let mut dest = [0_u8; 16]; + simd.store_array_u8x16(a, &mut dest); + assert_eq!(dest, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); +} + +#[simd_test] +fn store_array_u16x8(simd: S) { + let a = u16x8::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8]); + let mut dest = [0_u16; 8]; + simd.store_array_u16x8(a, &mut dest); + assert_eq!(dest, [1, 2, 3, 4, 5, 6, 7, 8]); +} + +#[simd_test] +fn store_array_u32x4(simd: S) { + let a = u32x4::from_slice(simd, &[1, 2, 3, 4]); + let mut dest = [0_u32; 4]; + simd.store_array_u32x4(a, &mut dest); + assert_eq!(dest, [1, 2, 3, 4]); +} + +#[simd_test] +fn store_array_i32x8(simd: S) { + let a = i32x8::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8]); + let mut dest = [0_i32; 8]; + simd.store_array_i32x8(a, &mut dest); + assert_eq!(dest, [1, 2, 3, 4, 5, 6, 7, 8]); +} + +#[simd_test] +fn store_array_u32x8(simd: S) { + let a = u32x8::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8]); + let mut dest = [0_u32; 8]; + simd.store_array_u32x8(a, &mut dest); + assert_eq!(dest, [1, 2, 3, 4, 5, 6, 7, 8]); +} + +#[simd_test] +fn store_array_f32x16(simd: S) { + let data: [f32; 16] = core::array::from_fn(|i| (i + 1) as f32); + let a = f32x16::from_slice(simd, &data); + let mut dest = [0.0_f32; 16]; + simd.store_array_f32x16(a, &mut dest); + assert_eq!(dest, data); +} + +#[simd_test] +fn store_array_f64x8(simd: S) { + let data: [f64; 8] = core::array::from_fn(|i| (i + 1) as f64); + let a = f64x8::from_slice(simd, &data); + let mut dest = [0.0_f64; 8]; + simd.store_array_f64x8(a, &mut dest); + assert_eq!(dest, data); +} + +#[simd_test] +fn store_array_i8x32(simd: S) { + let data: [i8; 32] = core::array::from_fn(|i| (i + 1) as i8); + let a = i8x32::from_slice(simd, &data); + let mut dest = [0_i8; 32]; + simd.store_array_i8x32(a, &mut dest); + assert_eq!(dest, data); +} + +#[simd_test] +fn store_array_i8x64(simd: S) { + let data: [i8; 64] = core::array::from_fn(|i| (i + 1) as i8); + let a = i8x64::from_slice(simd, &data); + let mut dest = [0_i8; 64]; + simd.store_array_i8x64(a, &mut dest); + assert_eq!(dest, data); +} + +#[simd_test] +fn store_array_i16x16(simd: S) { + let data: [i16; 16] = core::array::from_fn(|i| (i + 1) as i16); + let a = i16x16::from_slice(simd, &data); + let mut dest = [0_i16; 16]; + simd.store_array_i16x16(a, &mut dest); + assert_eq!(dest, data); +} + +#[simd_test] +fn store_array_i16x32(simd: S) { + let data: [i16; 32] = core::array::from_fn(|i| (i + 1) as i16); + let a = i16x32::from_slice(simd, &data); + let mut dest = [0_i16; 32]; + simd.store_array_i16x32(a, &mut dest); + assert_eq!(dest, data); +} + +#[simd_test] +fn store_array_i32x16(simd: S) { + let data: [i32; 16] = core::array::from_fn(|i| (i + 1) as i32); + let a = i32x16::from_slice(simd, &data); + let mut dest = [0_i32; 16]; + simd.store_array_i32x16(a, &mut dest); + assert_eq!(dest, data); +} + +#[simd_test] +fn store_array_u8x32(simd: S) { + let data: [u8; 32] = core::array::from_fn(|i| (i + 1) as u8); + let a = u8x32::from_slice(simd, &data); + let mut dest = [0_u8; 32]; + simd.store_array_u8x32(a, &mut dest); + assert_eq!(dest, data); +} + +#[simd_test] +fn store_array_u8x64(simd: S) { + let data: [u8; 64] = core::array::from_fn(|i| (i + 1) as u8); + let a = u8x64::from_slice(simd, &data); + let mut dest = [0_u8; 64]; + simd.store_array_u8x64(a, &mut dest); + assert_eq!(dest, data); +} + +#[simd_test] +fn store_array_u16x16(simd: S) { + let data: [u16; 16] = core::array::from_fn(|i| (i + 1) as u16); + let a = u16x16::from_slice(simd, &data); + let mut dest = [0_u16; 16]; + simd.store_array_u16x16(a, &mut dest); + assert_eq!(dest, data); +} + +#[simd_test] +fn store_array_u16x32(simd: S) { + let data: [u16; 32] = core::array::from_fn(|i| (i + 1) as u16); + let a = u16x32::from_slice(simd, &data); + let mut dest = [0_u16; 32]; + simd.store_array_u16x32(a, &mut dest); + assert_eq!(dest, data); +} + +#[simd_test] +fn store_array_u32x16(simd: S) { + let data: [u32; 16] = core::array::from_fn(|i| (i + 1) as u32); + let a = u32x16::from_slice(simd, &data); + let mut dest = [0_u32; 16]; + simd.store_array_u32x16(a, &mut dest); + assert_eq!(dest, data); +} From 660e93237d62cc4e268c5add5a1bc512597d73b3 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Sat, 3 Jan 2026 10:17:07 +0100 Subject: [PATCH 2/9] Add the `store_slice` method --- fearless_simd/src/generated/simd_trait.rs | 4 + fearless_simd/src/generated/simd_types.rs | 180 ++++++++++++++++++++++ fearless_simd_gen/src/mk_simd_trait.rs | 4 + fearless_simd_gen/src/mk_simd_types.rs | 6 + fearless_simd_tests/tests/harness/mod.rs | 8 + 5 files changed, 202 insertions(+) diff --git a/fearless_simd/src/generated/simd_trait.rs b/fearless_simd/src/generated/simd_trait.rs index 73ea9aa0..b0732adf 100644 --- a/fearless_simd/src/generated/simd_trait.rs +++ b/fearless_simd/src/generated/simd_trait.rs @@ -2595,6 +2595,10 @@ pub trait SimdBase: #[doc = r""] #[doc = r" The slice must be the proper width."] fn from_slice(simd: S, slice: &[Self::Element]) -> Self; + #[doc = r" Store a SIMD vector into a slice."] + #[doc = r""] + #[doc = r" The slice must be the proper width."] + fn store_slice(&self, slice: &mut [Self::Element]); #[doc = r" Create a SIMD vector with all elements set to the given value."] fn splat(simd: S, val: Self::Element) -> Self; #[doc = r" Create a SIMD vector from a 128-bit vector of the same scalar"] diff --git a/fearless_simd/src/generated/simd_types.rs b/fearless_simd/src/generated/simd_types.rs index e9f37172..21e6c460 100644 --- a/fearless_simd/src/generated/simd_types.rs +++ b/fearless_simd/src/generated/simd_types.rs @@ -100,6 +100,11 @@ impl SimdBase for f32x4 { simd.load_array_f32x4(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [f32]) { + self.simd + .store_array_f32x4(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: f32) -> Self { simd.splat_f32x4(val) } @@ -325,6 +330,11 @@ impl SimdBase for i8x16 { simd.load_array_i8x16(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i8]) { + self.simd + .store_array_i8x16(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i8) -> Self { simd.splat_i8x16(val) } @@ -486,6 +496,11 @@ impl SimdBase for u8x16 { simd.load_array_u8x16(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [u8]) { + self.simd + .store_array_u8x16(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: u8) -> Self { simd.splat_u8x16(val) } @@ -652,6 +667,11 @@ impl SimdBase for mask8x16 { simd.load_array_mask8x16(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i8]) { + self.simd + .store_array_mask8x16(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i8) -> Self { simd.splat_mask8x16(val) } @@ -789,6 +809,11 @@ impl SimdBase for i16x8 { simd.load_array_i16x8(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i16]) { + self.simd + .store_array_i16x8(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i16) -> Self { simd.splat_i16x8(val) } @@ -950,6 +975,11 @@ impl SimdBase for u16x8 { simd.load_array_u16x8(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [u16]) { + self.simd + .store_array_u16x8(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: u16) -> Self { simd.splat_u16x8(val) } @@ -1116,6 +1146,11 @@ impl SimdBase for mask16x8 { simd.load_array_mask16x8(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i16]) { + self.simd + .store_array_mask16x8(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i16) -> Self { simd.splat_mask16x8(val) } @@ -1253,6 +1288,11 @@ impl SimdBase for i32x4 { simd.load_array_i32x4(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i32]) { + self.simd + .store_array_i32x4(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i32) -> Self { simd.splat_i32x4(val) } @@ -1426,6 +1466,11 @@ impl SimdBase for u32x4 { simd.load_array_u32x4(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [u32]) { + self.simd + .store_array_u32x4(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: u32) -> Self { simd.splat_u32x4(val) } @@ -1604,6 +1649,11 @@ impl SimdBase for mask32x4 { simd.load_array_mask32x4(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i32]) { + self.simd + .store_array_mask32x4(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i32) -> Self { simd.splat_mask32x4(val) } @@ -1741,6 +1791,11 @@ impl SimdBase for f64x2 { simd.load_array_f64x2(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [f64]) { + self.simd + .store_array_f64x2(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: f64) -> Self { simd.splat_f64x2(val) } @@ -1957,6 +2012,11 @@ impl SimdBase for mask64x2 { simd.load_array_mask64x2(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i64]) { + self.simd + .store_array_mask64x2(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i64) -> Self { simd.splat_mask64x2(val) } @@ -2094,6 +2154,11 @@ impl SimdBase for f32x8 { simd.load_array_f32x8(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [f32]) { + self.simd + .store_array_f32x8(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: f32) -> Self { simd.splat_f32x8(val) } @@ -2326,6 +2391,11 @@ impl SimdBase for i8x32 { simd.load_array_i8x32(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i8]) { + self.simd + .store_array_i8x32(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i8) -> Self { simd.splat_i8x32(val) } @@ -2494,6 +2564,11 @@ impl SimdBase for u8x32 { simd.load_array_u8x32(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [u8]) { + self.simd + .store_array_u8x32(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: u8) -> Self { simd.splat_u8x32(val) } @@ -2667,6 +2742,11 @@ impl SimdBase for mask8x32 { simd.load_array_mask8x32(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i8]) { + self.simd + .store_array_mask8x32(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i8) -> Self { simd.splat_mask8x32(val) } @@ -2816,6 +2896,11 @@ impl SimdBase for i16x16 { simd.load_array_i16x16(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i16]) { + self.simd + .store_array_i16x16(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i16) -> Self { simd.splat_i16x16(val) } @@ -2989,6 +3074,11 @@ impl SimdBase for u16x16 { simd.load_array_u16x16(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [u16]) { + self.simd + .store_array_u16x16(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: u16) -> Self { simd.splat_u16x16(val) } @@ -3162,6 +3252,11 @@ impl SimdBase for mask16x16 { simd.load_array_mask16x16(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i16]) { + self.simd + .store_array_mask16x16(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i16) -> Self { simd.splat_mask16x16(val) } @@ -3306,6 +3401,11 @@ impl SimdBase for i32x8 { simd.load_array_i32x8(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i32]) { + self.simd + .store_array_i32x8(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i32) -> Self { simd.splat_i32x8(val) } @@ -3486,6 +3586,11 @@ impl SimdBase for u32x8 { simd.load_array_u32x8(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [u32]) { + self.simd + .store_array_u32x8(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: u32) -> Self { simd.splat_u32x8(val) } @@ -3671,6 +3776,11 @@ impl SimdBase for mask32x8 { simd.load_array_mask32x8(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i32]) { + self.simd + .store_array_mask32x8(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i32) -> Self { simd.splat_mask32x8(val) } @@ -3815,6 +3925,11 @@ impl SimdBase for f64x4 { simd.load_array_f64x4(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [f64]) { + self.simd + .store_array_f64x4(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: f64) -> Self { simd.splat_f64x4(val) } @@ -4038,6 +4153,11 @@ impl SimdBase for mask64x4 { simd.load_array_mask64x4(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i64]) { + self.simd + .store_array_mask64x4(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i64) -> Self { simd.splat_mask64x4(val) } @@ -4187,6 +4307,11 @@ impl SimdBase for f32x16 { simd.load_array_f32x16(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [f32]) { + self.simd + .store_array_f32x16(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: f32) -> Self { simd.splat_f32x16(val) } @@ -4413,6 +4538,11 @@ impl SimdBase for i8x64 { simd.load_array_i8x64(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i8]) { + self.simd + .store_array_i8x64(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i8) -> Self { simd.splat_i8x64(val) } @@ -4575,6 +4705,11 @@ impl SimdBase for u8x64 { simd.load_array_u8x64(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [u8]) { + self.simd + .store_array_u8x64(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: u8) -> Self { simd.splat_u8x64(val) } @@ -4742,6 +4877,11 @@ impl SimdBase for mask8x64 { simd.load_array_mask8x64(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i8]) { + self.simd + .store_array_mask8x64(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i8) -> Self { simd.splat_mask8x64(val) } @@ -4885,6 +5025,11 @@ impl SimdBase for i16x32 { simd.load_array_i16x32(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i16]) { + self.simd + .store_array_i16x32(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i16) -> Self { simd.splat_i16x32(val) } @@ -5052,6 +5197,11 @@ impl SimdBase for u16x32 { simd.load_array_u16x32(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [u16]) { + self.simd + .store_array_u16x32(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: u16) -> Self { simd.splat_u16x32(val) } @@ -5219,6 +5369,11 @@ impl SimdBase for mask16x32 { simd.load_array_mask16x32(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i16]) { + self.simd + .store_array_mask16x32(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i16) -> Self { simd.splat_mask16x32(val) } @@ -5362,6 +5517,11 @@ impl SimdBase for i32x16 { simd.load_array_i32x16(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i32]) { + self.simd + .store_array_i32x16(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i32) -> Self { simd.splat_i32x16(val) } @@ -5541,6 +5701,11 @@ impl SimdBase for u32x16 { simd.load_array_u32x16(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [u32]) { + self.simd + .store_array_u32x16(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: u32) -> Self { simd.splat_u32x16(val) } @@ -5720,6 +5885,11 @@ impl SimdBase for mask32x16 { simd.load_array_mask32x16(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i32]) { + self.simd + .store_array_mask32x16(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i32) -> Self { simd.splat_mask32x16(val) } @@ -5858,6 +6028,11 @@ impl SimdBase for f64x8 { simd.load_array_f64x8(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [f64]) { + self.simd + .store_array_f64x8(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: f64) -> Self { simd.splat_f64x8(val) } @@ -6075,6 +6250,11 @@ impl SimdBase for mask64x8 { simd.load_array_mask64x8(slice.try_into().unwrap()) } #[inline(always)] + fn store_slice(&self, slice: &mut [i64]) { + self.simd + .store_array_mask64x8(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: i64) -> Self { simd.splat_mask64x8(val) } diff --git a/fearless_simd_gen/src/mk_simd_trait.rs b/fearless_simd_gen/src/mk_simd_trait.rs index 1e9c39ad..bd916e76 100644 --- a/fearless_simd_gen/src/mk_simd_trait.rs +++ b/fearless_simd_gen/src/mk_simd_trait.rs @@ -167,6 +167,10 @@ fn mk_simd_base() -> TokenStream { /// /// The slice must be the proper width. fn from_slice(simd: S, slice: &[Self::Element]) -> Self; + /// Store a SIMD vector into a slice. + /// + /// The slice must be the proper width. + fn store_slice(&self, slice: &mut [Self::Element]); /// Create a SIMD vector with all elements set to the given value. fn splat(simd: S, val: Self::Element) -> Self; /// Create a SIMD vector from a 128-bit vector of the same scalar diff --git a/fearless_simd_gen/src/mk_simd_types.rs b/fearless_simd_gen/src/mk_simd_types.rs index a1be080d..46b4c386 100644 --- a/fearless_simd_gen/src/mk_simd_types.rs +++ b/fearless_simd_gen/src/mk_simd_types.rs @@ -289,6 +289,7 @@ fn simd_vec_impl(ty: &VecType) -> TokenStream { _ => unreachable!(), }; let from_array_op = generic_op_name("load_array", ty); + let store_array_op = generic_op_name("store_array", ty); let as_array_ref_op = generic_op_name("as_array_ref", ty); let as_array_mut_op = generic_op_name("as_array_mut", ty); quote! { @@ -319,6 +320,11 @@ fn simd_vec_impl(ty: &VecType) -> TokenStream { simd.#from_array_op(slice.try_into().unwrap()) } + #[inline(always)] + fn store_slice(&self, slice: &mut [#scalar]) { + self.simd.#store_array_op(*self, slice.try_into().unwrap()) + } + #[inline(always)] fn splat(simd: S, val: #scalar) -> Self { simd.#splat(val) diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index 0f56649a..8bce3545 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -3187,3 +3187,11 @@ fn store_array_u32x16(simd: S) { simd.store_array_u32x16(a, &mut dest); assert_eq!(dest, data); } + +#[simd_test] +fn store_slice_f32x4(simd: S) { + let a = f32x4::from_slice(simd, &[1.0, 2.0, 3.0, 4.0]); + let mut dest = [0.0_f32; 4]; + a.store_slice(&mut dest); + assert_eq!(dest, [1.0, 2.0, 3.0, 4.0]); +} \ No newline at end of file From a1d5ad5cde1883ed07fef4ebe11b55f453cb1184 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Sat, 3 Jan 2026 10:19:27 +0100 Subject: [PATCH 3/9] Reformat --- fearless_simd_gen/src/mk_x86.rs | 8 +++++--- fearless_simd_gen/src/ops.rs | 5 ++++- fearless_simd_tests/tests/harness/mod.rs | 22 +++++++++++++++++----- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/fearless_simd_gen/src/mk_x86.rs b/fearless_simd_gen/src/mk_x86.rs index 6f11b538..7c7ed34e 100644 --- a/fearless_simd_gen/src/mk_x86.rs +++ b/fearless_simd_gen/src/mk_x86.rs @@ -182,9 +182,11 @@ impl Level for X86 { self.arch_ty(vec_ty) }) } - OpSig::ToArray => generic_to_array(method_sig, vec_ty, self.max_block_size(), |block_ty| { - intrinsic_ident("storeu", coarse_type(block_ty), block_ty.n_bits()) - }), + OpSig::ToArray => { + generic_to_array(method_sig, vec_ty, self.max_block_size(), |block_ty| { + intrinsic_ident("storeu", coarse_type(block_ty), block_ty.n_bits()) + }) + } OpSig::FromBytes => generic_from_bytes(method_sig, vec_ty), OpSig::ToBytes => generic_to_bytes(method_sig, vec_ty), } diff --git a/fearless_simd_gen/src/ops.rs b/fearless_simd_gen/src/ops.rs index ab779614..6ddeed09 100644 --- a/fearless_simd_gen/src/ops.rs +++ b/fearless_simd_gen/src/ops.rs @@ -307,7 +307,10 @@ impl Op { .collect::>(); let method_ident = Ident::new(self.method, Span::call_site()); let sig_inner = match &self.sig { - OpSig::Splat | OpSig::LoadInterleaved { .. } | OpSig::StoreInterleaved { .. } | OpSig::ToArray => { + OpSig::Splat + | OpSig::LoadInterleaved { .. } + | OpSig::StoreInterleaved { .. } + | OpSig::ToArray => { return None; } OpSig::Unary diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index 8bce3545..a78a28a2 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -3018,10 +3018,16 @@ fn store_array_f64x4(simd: S) { #[simd_test] fn store_array_i8x16(simd: S) { - let a = i8x16::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let a = i8x16::from_slice( + simd, + &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + ); let mut dest = [0_i8; 16]; simd.store_array_i8x16(a, &mut dest); - assert_eq!(dest, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + assert_eq!( + dest, + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + ); } #[simd_test] @@ -3042,10 +3048,16 @@ fn store_array_i32x4(simd: S) { #[simd_test] fn store_array_u8x16(simd: S) { - let a = u8x16::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let a = u8x16::from_slice( + simd, + &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + ); let mut dest = [0_u8; 16]; simd.store_array_u8x16(a, &mut dest); - assert_eq!(dest, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + assert_eq!( + dest, + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + ); } #[simd_test] @@ -3194,4 +3206,4 @@ fn store_slice_f32x4(simd: S) { let mut dest = [0.0_f32; 4]; a.store_slice(&mut dest); assert_eq!(dest, [1.0, 2.0, 3.0, 4.0]); -} \ No newline at end of file +} From f5b28432f9092d1e24abe94882e6d8da89da00c7 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Sat, 3 Jan 2026 10:36:19 +0100 Subject: [PATCH 4/9] FIx clippy --- fearless_simd/src/generated/simd_types.rs | 72 +++++++++++------------ fearless_simd_gen/src/mk_simd_types.rs | 2 +- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/fearless_simd/src/generated/simd_types.rs b/fearless_simd/src/generated/simd_types.rs index 21e6c460..6768951e 100644 --- a/fearless_simd/src/generated/simd_types.rs +++ b/fearless_simd/src/generated/simd_types.rs @@ -102,7 +102,7 @@ impl SimdBase for f32x4 { #[inline(always)] fn store_slice(&self, slice: &mut [f32]) { self.simd - .store_array_f32x4(*self, slice.try_into().unwrap()) + .store_array_f32x4(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: f32) -> Self { @@ -332,7 +332,7 @@ impl SimdBase for i8x16 { #[inline(always)] fn store_slice(&self, slice: &mut [i8]) { self.simd - .store_array_i8x16(*self, slice.try_into().unwrap()) + .store_array_i8x16(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i8) -> Self { @@ -498,7 +498,7 @@ impl SimdBase for u8x16 { #[inline(always)] fn store_slice(&self, slice: &mut [u8]) { self.simd - .store_array_u8x16(*self, slice.try_into().unwrap()) + .store_array_u8x16(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: u8) -> Self { @@ -669,7 +669,7 @@ impl SimdBase for mask8x16 { #[inline(always)] fn store_slice(&self, slice: &mut [i8]) { self.simd - .store_array_mask8x16(*self, slice.try_into().unwrap()) + .store_array_mask8x16(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i8) -> Self { @@ -811,7 +811,7 @@ impl SimdBase for i16x8 { #[inline(always)] fn store_slice(&self, slice: &mut [i16]) { self.simd - .store_array_i16x8(*self, slice.try_into().unwrap()) + .store_array_i16x8(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i16) -> Self { @@ -977,7 +977,7 @@ impl SimdBase for u16x8 { #[inline(always)] fn store_slice(&self, slice: &mut [u16]) { self.simd - .store_array_u16x8(*self, slice.try_into().unwrap()) + .store_array_u16x8(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: u16) -> Self { @@ -1148,7 +1148,7 @@ impl SimdBase for mask16x8 { #[inline(always)] fn store_slice(&self, slice: &mut [i16]) { self.simd - .store_array_mask16x8(*self, slice.try_into().unwrap()) + .store_array_mask16x8(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i16) -> Self { @@ -1290,7 +1290,7 @@ impl SimdBase for i32x4 { #[inline(always)] fn store_slice(&self, slice: &mut [i32]) { self.simd - .store_array_i32x4(*self, slice.try_into().unwrap()) + .store_array_i32x4(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i32) -> Self { @@ -1468,7 +1468,7 @@ impl SimdBase for u32x4 { #[inline(always)] fn store_slice(&self, slice: &mut [u32]) { self.simd - .store_array_u32x4(*self, slice.try_into().unwrap()) + .store_array_u32x4(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: u32) -> Self { @@ -1651,7 +1651,7 @@ impl SimdBase for mask32x4 { #[inline(always)] fn store_slice(&self, slice: &mut [i32]) { self.simd - .store_array_mask32x4(*self, slice.try_into().unwrap()) + .store_array_mask32x4(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i32) -> Self { @@ -1793,7 +1793,7 @@ impl SimdBase for f64x2 { #[inline(always)] fn store_slice(&self, slice: &mut [f64]) { self.simd - .store_array_f64x2(*self, slice.try_into().unwrap()) + .store_array_f64x2(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: f64) -> Self { @@ -2014,7 +2014,7 @@ impl SimdBase for mask64x2 { #[inline(always)] fn store_slice(&self, slice: &mut [i64]) { self.simd - .store_array_mask64x2(*self, slice.try_into().unwrap()) + .store_array_mask64x2(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i64) -> Self { @@ -2156,7 +2156,7 @@ impl SimdBase for f32x8 { #[inline(always)] fn store_slice(&self, slice: &mut [f32]) { self.simd - .store_array_f32x8(*self, slice.try_into().unwrap()) + .store_array_f32x8(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: f32) -> Self { @@ -2393,7 +2393,7 @@ impl SimdBase for i8x32 { #[inline(always)] fn store_slice(&self, slice: &mut [i8]) { self.simd - .store_array_i8x32(*self, slice.try_into().unwrap()) + .store_array_i8x32(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i8) -> Self { @@ -2566,7 +2566,7 @@ impl SimdBase for u8x32 { #[inline(always)] fn store_slice(&self, slice: &mut [u8]) { self.simd - .store_array_u8x32(*self, slice.try_into().unwrap()) + .store_array_u8x32(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: u8) -> Self { @@ -2744,7 +2744,7 @@ impl SimdBase for mask8x32 { #[inline(always)] fn store_slice(&self, slice: &mut [i8]) { self.simd - .store_array_mask8x32(*self, slice.try_into().unwrap()) + .store_array_mask8x32(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i8) -> Self { @@ -2898,7 +2898,7 @@ impl SimdBase for i16x16 { #[inline(always)] fn store_slice(&self, slice: &mut [i16]) { self.simd - .store_array_i16x16(*self, slice.try_into().unwrap()) + .store_array_i16x16(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i16) -> Self { @@ -3076,7 +3076,7 @@ impl SimdBase for u16x16 { #[inline(always)] fn store_slice(&self, slice: &mut [u16]) { self.simd - .store_array_u16x16(*self, slice.try_into().unwrap()) + .store_array_u16x16(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: u16) -> Self { @@ -3254,7 +3254,7 @@ impl SimdBase for mask16x16 { #[inline(always)] fn store_slice(&self, slice: &mut [i16]) { self.simd - .store_array_mask16x16(*self, slice.try_into().unwrap()) + .store_array_mask16x16(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i16) -> Self { @@ -3403,7 +3403,7 @@ impl SimdBase for i32x8 { #[inline(always)] fn store_slice(&self, slice: &mut [i32]) { self.simd - .store_array_i32x8(*self, slice.try_into().unwrap()) + .store_array_i32x8(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i32) -> Self { @@ -3588,7 +3588,7 @@ impl SimdBase for u32x8 { #[inline(always)] fn store_slice(&self, slice: &mut [u32]) { self.simd - .store_array_u32x8(*self, slice.try_into().unwrap()) + .store_array_u32x8(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: u32) -> Self { @@ -3778,7 +3778,7 @@ impl SimdBase for mask32x8 { #[inline(always)] fn store_slice(&self, slice: &mut [i32]) { self.simd - .store_array_mask32x8(*self, slice.try_into().unwrap()) + .store_array_mask32x8(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i32) -> Self { @@ -3927,7 +3927,7 @@ impl SimdBase for f64x4 { #[inline(always)] fn store_slice(&self, slice: &mut [f64]) { self.simd - .store_array_f64x4(*self, slice.try_into().unwrap()) + .store_array_f64x4(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: f64) -> Self { @@ -4155,7 +4155,7 @@ impl SimdBase for mask64x4 { #[inline(always)] fn store_slice(&self, slice: &mut [i64]) { self.simd - .store_array_mask64x4(*self, slice.try_into().unwrap()) + .store_array_mask64x4(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i64) -> Self { @@ -4309,7 +4309,7 @@ impl SimdBase for f32x16 { #[inline(always)] fn store_slice(&self, slice: &mut [f32]) { self.simd - .store_array_f32x16(*self, slice.try_into().unwrap()) + .store_array_f32x16(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: f32) -> Self { @@ -4540,7 +4540,7 @@ impl SimdBase for i8x64 { #[inline(always)] fn store_slice(&self, slice: &mut [i8]) { self.simd - .store_array_i8x64(*self, slice.try_into().unwrap()) + .store_array_i8x64(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i8) -> Self { @@ -4707,7 +4707,7 @@ impl SimdBase for u8x64 { #[inline(always)] fn store_slice(&self, slice: &mut [u8]) { self.simd - .store_array_u8x64(*self, slice.try_into().unwrap()) + .store_array_u8x64(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: u8) -> Self { @@ -4879,7 +4879,7 @@ impl SimdBase for mask8x64 { #[inline(always)] fn store_slice(&self, slice: &mut [i8]) { self.simd - .store_array_mask8x64(*self, slice.try_into().unwrap()) + .store_array_mask8x64(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i8) -> Self { @@ -5027,7 +5027,7 @@ impl SimdBase for i16x32 { #[inline(always)] fn store_slice(&self, slice: &mut [i16]) { self.simd - .store_array_i16x32(*self, slice.try_into().unwrap()) + .store_array_i16x32(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i16) -> Self { @@ -5199,7 +5199,7 @@ impl SimdBase for u16x32 { #[inline(always)] fn store_slice(&self, slice: &mut [u16]) { self.simd - .store_array_u16x32(*self, slice.try_into().unwrap()) + .store_array_u16x32(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: u16) -> Self { @@ -5371,7 +5371,7 @@ impl SimdBase for mask16x32 { #[inline(always)] fn store_slice(&self, slice: &mut [i16]) { self.simd - .store_array_mask16x32(*self, slice.try_into().unwrap()) + .store_array_mask16x32(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i16) -> Self { @@ -5519,7 +5519,7 @@ impl SimdBase for i32x16 { #[inline(always)] fn store_slice(&self, slice: &mut [i32]) { self.simd - .store_array_i32x16(*self, slice.try_into().unwrap()) + .store_array_i32x16(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i32) -> Self { @@ -5703,7 +5703,7 @@ impl SimdBase for u32x16 { #[inline(always)] fn store_slice(&self, slice: &mut [u32]) { self.simd - .store_array_u32x16(*self, slice.try_into().unwrap()) + .store_array_u32x16(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: u32) -> Self { @@ -5887,7 +5887,7 @@ impl SimdBase for mask32x16 { #[inline(always)] fn store_slice(&self, slice: &mut [i32]) { self.simd - .store_array_mask32x16(*self, slice.try_into().unwrap()) + .store_array_mask32x16(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i32) -> Self { @@ -6030,7 +6030,7 @@ impl SimdBase for f64x8 { #[inline(always)] fn store_slice(&self, slice: &mut [f64]) { self.simd - .store_array_f64x8(*self, slice.try_into().unwrap()) + .store_array_f64x8(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: f64) -> Self { @@ -6252,7 +6252,7 @@ impl SimdBase for mask64x8 { #[inline(always)] fn store_slice(&self, slice: &mut [i64]) { self.simd - .store_array_mask64x8(*self, slice.try_into().unwrap()) + .store_array_mask64x8(*self, slice.try_into().unwrap()); } #[inline(always)] fn splat(simd: S, val: i64) -> Self { diff --git a/fearless_simd_gen/src/mk_simd_types.rs b/fearless_simd_gen/src/mk_simd_types.rs index 46b4c386..4dd9f0c2 100644 --- a/fearless_simd_gen/src/mk_simd_types.rs +++ b/fearless_simd_gen/src/mk_simd_types.rs @@ -322,7 +322,7 @@ fn simd_vec_impl(ty: &VecType) -> TokenStream { #[inline(always)] fn store_slice(&self, slice: &mut [#scalar]) { - self.simd.#store_array_op(*self, slice.try_into().unwrap()) + self.simd.#store_array_op(*self, slice.try_into().unwrap()); } #[inline(always)] From bf84c30b42d7804cc4513fdf3efca44853f9b8db Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Sat, 3 Jan 2026 19:10:39 +0100 Subject: [PATCH 5/9] Clippy --- fearless_simd_tests/tests/harness/mod.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index a78a28a2..fb4bd52c 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -3094,7 +3094,7 @@ fn store_array_u32x8(simd: S) { #[simd_test] fn store_array_f32x16(simd: S) { - let data: [f32; 16] = core::array::from_fn(|i| (i + 1) as f32); + let data: [f32; 16] = core::array::from_fn(|i| i as f32); let a = f32x16::from_slice(simd, &data); let mut dest = [0.0_f32; 16]; simd.store_array_f32x16(a, &mut dest); @@ -3103,7 +3103,7 @@ fn store_array_f32x16(simd: S) { #[simd_test] fn store_array_f64x8(simd: S) { - let data: [f64; 8] = core::array::from_fn(|i| (i + 1) as f64); + let data: [f64; 8] = core::array::from_fn(|i| i as f64); let a = f64x8::from_slice(simd, &data); let mut dest = [0.0_f64; 8]; simd.store_array_f64x8(a, &mut dest); @@ -3112,7 +3112,7 @@ fn store_array_f64x8(simd: S) { #[simd_test] fn store_array_i8x32(simd: S) { - let data: [i8; 32] = core::array::from_fn(|i| (i + 1) as i8); + let data: [i8; 32] = core::array::from_fn(|i| i as i8); let a = i8x32::from_slice(simd, &data); let mut dest = [0_i8; 32]; simd.store_array_i8x32(a, &mut dest); @@ -3121,7 +3121,7 @@ fn store_array_i8x32(simd: S) { #[simd_test] fn store_array_i8x64(simd: S) { - let data: [i8; 64] = core::array::from_fn(|i| (i + 1) as i8); + let data: [i8; 64] = core::array::from_fn(|i| i as i8); let a = i8x64::from_slice(simd, &data); let mut dest = [0_i8; 64]; simd.store_array_i8x64(a, &mut dest); @@ -3130,7 +3130,7 @@ fn store_array_i8x64(simd: S) { #[simd_test] fn store_array_i16x16(simd: S) { - let data: [i16; 16] = core::array::from_fn(|i| (i + 1) as i16); + let data: [i16; 16] = core::array::from_fn(|i| i as i16); let a = i16x16::from_slice(simd, &data); let mut dest = [0_i16; 16]; simd.store_array_i16x16(a, &mut dest); @@ -3139,7 +3139,7 @@ fn store_array_i16x16(simd: S) { #[simd_test] fn store_array_i16x32(simd: S) { - let data: [i16; 32] = core::array::from_fn(|i| (i + 1) as i16); + let data: [i16; 32] = core::array::from_fn(|i| i as i16); let a = i16x32::from_slice(simd, &data); let mut dest = [0_i16; 32]; simd.store_array_i16x32(a, &mut dest); @@ -3148,7 +3148,7 @@ fn store_array_i16x32(simd: S) { #[simd_test] fn store_array_i32x16(simd: S) { - let data: [i32; 16] = core::array::from_fn(|i| (i + 1) as i32); + let data: [i32; 16] = core::array::from_fn(|i| i as i32); let a = i32x16::from_slice(simd, &data); let mut dest = [0_i32; 16]; simd.store_array_i32x16(a, &mut dest); @@ -3157,7 +3157,7 @@ fn store_array_i32x16(simd: S) { #[simd_test] fn store_array_u8x32(simd: S) { - let data: [u8; 32] = core::array::from_fn(|i| (i + 1) as u8); + let data: [u8; 32] = core::array::from_fn(|i| i as u8); let a = u8x32::from_slice(simd, &data); let mut dest = [0_u8; 32]; simd.store_array_u8x32(a, &mut dest); @@ -3166,7 +3166,7 @@ fn store_array_u8x32(simd: S) { #[simd_test] fn store_array_u8x64(simd: S) { - let data: [u8; 64] = core::array::from_fn(|i| (i + 1) as u8); + let data: [u8; 64] = core::array::from_fn(|i| i as u8); let a = u8x64::from_slice(simd, &data); let mut dest = [0_u8; 64]; simd.store_array_u8x64(a, &mut dest); @@ -3175,7 +3175,7 @@ fn store_array_u8x64(simd: S) { #[simd_test] fn store_array_u16x16(simd: S) { - let data: [u16; 16] = core::array::from_fn(|i| (i + 1) as u16); + let data: [u16; 16] = core::array::from_fn(|i| i as u16); let a = u16x16::from_slice(simd, &data); let mut dest = [0_u16; 16]; simd.store_array_u16x16(a, &mut dest); @@ -3184,7 +3184,7 @@ fn store_array_u16x16(simd: S) { #[simd_test] fn store_array_u16x32(simd: S) { - let data: [u16; 32] = core::array::from_fn(|i| (i + 1) as u16); + let data: [u16; 32] = core::array::from_fn(|i| i as u16); let a = u16x32::from_slice(simd, &data); let mut dest = [0_u16; 32]; simd.store_array_u16x32(a, &mut dest); @@ -3193,7 +3193,7 @@ fn store_array_u16x32(simd: S) { #[simd_test] fn store_array_u32x16(simd: S) { - let data: [u32; 16] = core::array::from_fn(|i| (i + 1) as u32); + let data: [u32; 16] = core::array::from_fn(|i| i as u32); let a = u32x16::from_slice(simd, &data); let mut dest = [0_u32; 16]; simd.store_array_u32x16(a, &mut dest); From c4c19c3710b301a92b7d82047442aaf909497443 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Sat, 3 Jan 2026 19:41:01 +0100 Subject: [PATCH 6/9] Clippy --- fearless_simd_tests/tests/harness/mod.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index fb4bd52c..3b618e87 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -3112,7 +3112,7 @@ fn store_array_f64x8(simd: S) { #[simd_test] fn store_array_i8x32(simd: S) { - let data: [i8; 32] = core::array::from_fn(|i| i as i8); + let data: [i8; 32] = core::array::from_fn(|i| i8::try_from(i).unwrap()); let a = i8x32::from_slice(simd, &data); let mut dest = [0_i8; 32]; simd.store_array_i8x32(a, &mut dest); @@ -3121,7 +3121,7 @@ fn store_array_i8x32(simd: S) { #[simd_test] fn store_array_i8x64(simd: S) { - let data: [i8; 64] = core::array::from_fn(|i| i as i8); + let data: [i8; 64] = core::array::from_fn(|i| i8::try_from(i).unwrap()); let a = i8x64::from_slice(simd, &data); let mut dest = [0_i8; 64]; simd.store_array_i8x64(a, &mut dest); @@ -3130,7 +3130,7 @@ fn store_array_i8x64(simd: S) { #[simd_test] fn store_array_i16x16(simd: S) { - let data: [i16; 16] = core::array::from_fn(|i| i as i16); + let data: [i16; 16] = core::array::from_fn(|i| i16::try_from(i).unwrap()); let a = i16x16::from_slice(simd, &data); let mut dest = [0_i16; 16]; simd.store_array_i16x16(a, &mut dest); @@ -3139,7 +3139,7 @@ fn store_array_i16x16(simd: S) { #[simd_test] fn store_array_i16x32(simd: S) { - let data: [i16; 32] = core::array::from_fn(|i| i as i16); + let data: [i16; 32] = core::array::from_fn(|i| i16::try_from(i).unwrap()); let a = i16x32::from_slice(simd, &data); let mut dest = [0_i16; 32]; simd.store_array_i16x32(a, &mut dest); @@ -3148,7 +3148,7 @@ fn store_array_i16x32(simd: S) { #[simd_test] fn store_array_i32x16(simd: S) { - let data: [i32; 16] = core::array::from_fn(|i| i as i32); + let data: [i32; 16] = core::array::from_fn(|i| i32::try_from(i).unwrap()); let a = i32x16::from_slice(simd, &data); let mut dest = [0_i32; 16]; simd.store_array_i32x16(a, &mut dest); @@ -3157,7 +3157,7 @@ fn store_array_i32x16(simd: S) { #[simd_test] fn store_array_u8x32(simd: S) { - let data: [u8; 32] = core::array::from_fn(|i| i as u8); + let data: [u8; 32] = core::array::from_fn(|i| u8::try_from(i).unwrap()); let a = u8x32::from_slice(simd, &data); let mut dest = [0_u8; 32]; simd.store_array_u8x32(a, &mut dest); @@ -3166,7 +3166,7 @@ fn store_array_u8x32(simd: S) { #[simd_test] fn store_array_u8x64(simd: S) { - let data: [u8; 64] = core::array::from_fn(|i| i as u8); + let data: [u8; 64] = core::array::from_fn(|i| u8::try_from(i).unwrap()); let a = u8x64::from_slice(simd, &data); let mut dest = [0_u8; 64]; simd.store_array_u8x64(a, &mut dest); @@ -3175,7 +3175,7 @@ fn store_array_u8x64(simd: S) { #[simd_test] fn store_array_u16x16(simd: S) { - let data: [u16; 16] = core::array::from_fn(|i| i as u16); + let data: [u16; 16] = core::array::from_fn(|i| u16::try_from(i).unwrap()); let a = u16x16::from_slice(simd, &data); let mut dest = [0_u16; 16]; simd.store_array_u16x16(a, &mut dest); @@ -3184,7 +3184,7 @@ fn store_array_u16x16(simd: S) { #[simd_test] fn store_array_u16x32(simd: S) { - let data: [u16; 32] = core::array::from_fn(|i| i as u16); + let data: [u16; 32] = core::array::from_fn(|i| u16::try_from(i).unwrap()); let a = u16x32::from_slice(simd, &data); let mut dest = [0_u16; 32]; simd.store_array_u16x32(a, &mut dest); @@ -3193,7 +3193,7 @@ fn store_array_u16x32(simd: S) { #[simd_test] fn store_array_u32x16(simd: S) { - let data: [u32; 16] = core::array::from_fn(|i| i as u32); + let data: [u32; 16] = core::array::from_fn(|i| u32::try_from(i).unwrap()); let a = u32x16::from_slice(simd, &data); let mut dest = [0_u32; 16]; simd.store_array_u32x16(a, &mut dest); From bbe66d5b805210fb30de891de6835ad54320f842 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Sun, 18 Jan 2026 15:31:05 +0100 Subject: [PATCH 7/9] Rename `ToArray` to `StoreArray` --- fearless_simd_gen/src/generic.rs | 2 +- fearless_simd_gen/src/mk_fallback.rs | 2 +- fearless_simd_gen/src/mk_neon.rs | 2 +- fearless_simd_gen/src/mk_wasm.rs | 8 +++++--- fearless_simd_gen/src/mk_x86.rs | 2 +- fearless_simd_gen/src/ops.rs | 16 ++++++++-------- 6 files changed, 17 insertions(+), 15 deletions(-) diff --git a/fearless_simd_gen/src/generic.rs b/fearless_simd_gen/src/generic.rs index a92230c4..b5afd39c 100644 --- a/fearless_simd_gen/src/generic.rs +++ b/fearless_simd_gen/src/generic.rs @@ -199,7 +199,7 @@ pub(crate) fn generic_op(op: &Op, ty: &VecType) -> TokenStream { | OpSig::Combine { .. } | OpSig::AsArray { .. } | OpSig::FromArray { .. } - | OpSig::ToArray => { + | OpSig::StoreArray => { panic!("These operations require more information about the target platform"); } OpSig::FromBytes => generic_from_bytes(method_sig, ty), diff --git a/fearless_simd_gen/src/mk_fallback.rs b/fearless_simd_gen/src/mk_fallback.rs index d1d65cae..c6c95533 100644 --- a/fearless_simd_gen/src/mk_fallback.rs +++ b/fearless_simd_gen/src/mk_fallback.rs @@ -488,7 +488,7 @@ impl Level for Fallback { } } } - OpSig::ToArray => { + OpSig::StoreArray => { quote! { #method_sig { *dest = a.val.0; diff --git a/fearless_simd_gen/src/mk_neon.rs b/fearless_simd_gen/src/mk_neon.rs index dc2b56b2..c70fdc55 100644 --- a/fearless_simd_gen/src/mk_neon.rs +++ b/fearless_simd_gen/src/mk_neon.rs @@ -474,7 +474,7 @@ impl Level for Neon { self.arch_ty(vec_ty) }) } - OpSig::ToArray => { + OpSig::StoreArray => { generic_to_array(method_sig, vec_ty, self.max_block_size(), store_intrinsic) } OpSig::FromBytes => generic_from_bytes(method_sig, vec_ty), diff --git a/fearless_simd_gen/src/mk_wasm.rs b/fearless_simd_gen/src/mk_wasm.rs index f3ae8ac7..a4b01b81 100644 --- a/fearless_simd_gen/src/mk_wasm.rs +++ b/fearless_simd_gen/src/mk_wasm.rs @@ -623,9 +623,11 @@ impl Level for WasmSimd128 { Ident::new("v128", Span::call_site()) }) } - OpSig::ToArray => generic_to_array(method_sig, vec_ty, self.max_block_size(), |_| { - v128_intrinsic("store") - }), + OpSig::StoreArray => { + generic_to_array(method_sig, vec_ty, self.max_block_size(), |_| { + v128_intrinsic("store") + }) + } OpSig::FromBytes => generic_from_bytes(method_sig, vec_ty), OpSig::ToBytes => generic_to_bytes(method_sig, vec_ty), } diff --git a/fearless_simd_gen/src/mk_x86.rs b/fearless_simd_gen/src/mk_x86.rs index 7c7ed34e..2b780447 100644 --- a/fearless_simd_gen/src/mk_x86.rs +++ b/fearless_simd_gen/src/mk_x86.rs @@ -182,7 +182,7 @@ impl Level for X86 { self.arch_ty(vec_ty) }) } - OpSig::ToArray => { + OpSig::StoreArray => { generic_to_array(method_sig, vec_ty, self.max_block_size(), |block_ty| { intrinsic_ident("storeu", coarse_type(block_ty), block_ty.n_bits()) }) diff --git a/fearless_simd_gen/src/ops.rs b/fearless_simd_gen/src/ops.rs index 6ddeed09..a7b50452 100644 --- a/fearless_simd_gen/src/ops.rs +++ b/fearless_simd_gen/src/ops.rs @@ -107,7 +107,7 @@ pub(crate) enum OpSig { /// `[f32; 4]` for `f32x4`) or a reference to it. AsArray { kind: RefKind }, /// Takes a vector and a mutable reference to an array, and stores the vector elements into the array. - ToArray, + StoreArray, /// Takes a single argument of the vector type, and returns a vector type with `u8` elements and the same bit width. FromBytes, /// Takes a single argument of a vector type with `u8` elements, and returns a vector type with different elements @@ -273,7 +273,7 @@ impl Op { let array_ty = quote! { [#rust_scalar; #len] }; quote! { (self, #arg0: #ref_tok #ty) -> #ref_tok #array_ty } } - OpSig::ToArray => { + OpSig::StoreArray => { let arg0 = &arg_names[0]; let arg1 = &arg_names[1]; let rust_scalar = vec_ty.scalar.rust(vec_ty.scalar_bits); @@ -310,7 +310,7 @@ impl Op { OpSig::Splat | OpSig::LoadInterleaved { .. } | OpSig::StoreInterleaved { .. } - | OpSig::ToArray => { + | OpSig::StoreArray => { return None; } OpSig::Unary @@ -450,7 +450,7 @@ const BASE_OPS: &[Op] = &[ Op::new( "store_array", OpKind::AssociatedOnly, - OpSig::ToArray, + OpSig::StoreArray, "Store a SIMD vector into an array of the same length.", ), Op::new( @@ -1301,7 +1301,7 @@ impl OpSig { | Self::StoreInterleaved { .. } | Self::FromArray { .. } | Self::AsArray { .. } - | Self::ToArray + | Self::StoreArray ) { return false; } @@ -1334,7 +1334,7 @@ impl OpSig { Self::Ternary | Self::Select => &["a", "b", "c"], Self::Shift => &["a", "shift"], Self::LoadInterleaved { .. } => &["src"], - Self::StoreInterleaved { .. } | Self::ToArray => &["a", "dest"], + Self::StoreInterleaved { .. } | Self::StoreArray => &["a", "dest"], } } fn vec_trait_arg_names(&self) -> &'static [&'static str] { @@ -1344,7 +1344,7 @@ impl OpSig { | Self::StoreInterleaved { .. } | Self::FromArray { .. } | Self::FromBytes { .. } - | Self::ToArray => &[], + | Self::StoreArray => &[], Self::Unary | Self::Cvt { .. } | Self::Reinterpret { .. } @@ -1398,7 +1398,7 @@ impl OpSig { | Self::StoreInterleaved { .. } | Self::FromArray { .. } | Self::AsArray { .. } - | Self::ToArray + | Self::StoreArray | Self::FromBytes | Self::ToBytes => return None, }; From 1452ac082fa5a5c57236efe9ddcb631b13cb54fb Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Sun, 18 Jan 2026 15:32:03 +0100 Subject: [PATCH 8/9] Rename `generic_to_array` --- fearless_simd_gen/src/generic.rs | 2 +- fearless_simd_gen/src/mk_neon.rs | 4 ++-- fearless_simd_gen/src/mk_wasm.rs | 4 ++-- fearless_simd_gen/src/mk_x86.rs | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fearless_simd_gen/src/generic.rs b/fearless_simd_gen/src/generic.rs index b5afd39c..892c4bce 100644 --- a/fearless_simd_gen/src/generic.rs +++ b/fearless_simd_gen/src/generic.rs @@ -333,7 +333,7 @@ pub(crate) fn generic_as_array( } } -pub(crate) fn generic_to_array( +pub(crate) fn generic_store_array( method_sig: TokenStream, vec_ty: &VecType, max_block_size: usize, diff --git a/fearless_simd_gen/src/mk_neon.rs b/fearless_simd_gen/src/mk_neon.rs index c70fdc55..366359e1 100644 --- a/fearless_simd_gen/src/mk_neon.rs +++ b/fearless_simd_gen/src/mk_neon.rs @@ -6,7 +6,7 @@ use quote::{ToTokens as _, format_ident, quote}; use crate::arch::neon::{load_intrinsic, store_intrinsic}; use crate::generic::{ - generic_as_array, generic_from_array, generic_from_bytes, generic_op_name, generic_to_array, + generic_as_array, generic_from_array, generic_from_bytes, generic_op_name, generic_store_array, generic_to_bytes, }; use crate::level::Level; @@ -475,7 +475,7 @@ impl Level for Neon { }) } OpSig::StoreArray => { - generic_to_array(method_sig, vec_ty, self.max_block_size(), store_intrinsic) + generic_store_array(method_sig, vec_ty, self.max_block_size(), store_intrinsic) } OpSig::FromBytes => generic_from_bytes(method_sig, vec_ty), OpSig::ToBytes => generic_to_bytes(method_sig, vec_ty), diff --git a/fearless_simd_gen/src/mk_wasm.rs b/fearless_simd_gen/src/mk_wasm.rs index a4b01b81..d098d765 100644 --- a/fearless_simd_gen/src/mk_wasm.rs +++ b/fearless_simd_gen/src/mk_wasm.rs @@ -7,7 +7,7 @@ use quote::{format_ident, quote}; use crate::arch::wasm::{arch_prefix, v128_intrinsic}; use crate::generic::{ generic_as_array, generic_block_combine, generic_block_split, generic_from_array, - generic_from_bytes, generic_op_name, generic_to_array, generic_to_bytes, scalar_binary, + generic_from_bytes, generic_op_name, generic_store_array, generic_to_bytes, scalar_binary, }; use crate::level::Level; use crate::ops::{Op, Quantifier, valid_reinterpret}; @@ -624,7 +624,7 @@ impl Level for WasmSimd128 { }) } OpSig::StoreArray => { - generic_to_array(method_sig, vec_ty, self.max_block_size(), |_| { + generic_store_array(method_sig, vec_ty, self.max_block_size(), |_| { v128_intrinsic("store") }) } diff --git a/fearless_simd_gen/src/mk_x86.rs b/fearless_simd_gen/src/mk_x86.rs index 2b780447..90700218 100644 --- a/fearless_simd_gen/src/mk_x86.rs +++ b/fearless_simd_gen/src/mk_x86.rs @@ -8,7 +8,7 @@ use crate::arch::x86::{ }; use crate::generic::{ generic_as_array, generic_block_combine, generic_block_split, generic_from_array, - generic_from_bytes, generic_op_name, generic_to_array, generic_to_bytes, scalar_binary, + generic_from_bytes, generic_op_name, generic_store_array, generic_to_bytes, scalar_binary, }; use crate::level::Level; use crate::ops::{Op, OpSig, Quantifier, valid_reinterpret}; @@ -183,7 +183,7 @@ impl Level for X86 { }) } OpSig::StoreArray => { - generic_to_array(method_sig, vec_ty, self.max_block_size(), |block_ty| { + generic_store_array(method_sig, vec_ty, self.max_block_size(), |block_ty| { intrinsic_ident("storeu", coarse_type(block_ty), block_ty.n_bits()) }) } From 85e217ef2b9538c7ccba8be22333f08e8d555378 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Sun, 18 Jan 2026 15:34:25 +0100 Subject: [PATCH 9/9] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6567c727..6a4f13ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ This release has an [MSRV][] of 1.88. - A "shift left by vector" operation, to go with the existing "shift right by vector". ([#155][] by [@valadaptive][]) - "Precise" float-to-integer conversions, which saturate out-of-bounds results and convert NaN to 0 across all platforms. ([#167][] by [@valadaptive][]) - The `Level::is_fallback` method, which lets you check if the current SIMD level is the scalar fallback. This works even if `Level::Fallback` is not compiled in, always returning false in that case. ([#168][] by [@valadaptive][]) +- Added `store_array` methods to store SIMD vectors back to memory explicitly using intrinsics. ([#181][] by [@LaurenzV][]) ### Fixed @@ -127,6 +128,7 @@ No changelog was kept for this release. [@Ralith]: https://github.com/Ralith [@DJMcNab]: https://github.com/DJMcNab [@valadaptive]: https://github.com/valadaptive +[@LaurenzV]: https://github.com/LaurenzV [@Shnatsel]: https://github.com/Shnatsel [#75]: https://github.com/linebender/fearless_simd/pull/75