From eaa2808127d1017d737b2e406cb050c9696724ee Mon Sep 17 00:00:00 2001 From: valadaptive Date: Mon, 15 Dec 2025 18:34:01 -0500 Subject: [PATCH 01/10] Add (scalarized) scatter/gather ops --- fearless_simd/src/generated/simd_types.rs | 608 +++++++++++++++++++++- fearless_simd/src/traits.rs | 47 ++ fearless_simd_gen/src/mk_simd_types.rs | 93 +++- 3 files changed, 746 insertions(+), 2 deletions(-) diff --git a/fearless_simd/src/generated/simd_types.rs b/fearless_simd/src/generated/simd_types.rs index 6768951e..a1012000 100644 --- a/fearless_simd/src/generated/simd_types.rs +++ b/fearless_simd/src/generated/simd_types.rs @@ -3,7 +3,10 @@ // This file is autogenerated by fearless_simd_gen -use crate::{Bytes, Select, Simd, SimdBase, SimdCvtFloat, SimdCvtTruncate, SimdFrom, SimdInto}; +use crate::{ + Bytes, Select, Simd, SimdBase, SimdCvtFloat, SimdCvtTruncate, SimdFrom, SimdGather, SimdInto, + SimdScatter, +}; #[doc = "A SIMD vector of 4 [`f32`] elements.\n\nYou may construct this vector type using the [`Self::splat`], [`Self::from_slice`], [`Self::simd_from`], [`Self::from_fn`], and [`Self::block_splat`] methods.\n\n```rust\n# use fearless_simd::{prelude::*, f32x4};\nfn construct_simd(simd: S) {\n // From a single scalar value:\n let a = f32x4::splat(simd, 1.0);\n let b = f32x4::simd_from(simd, 1.0);\n\n // From a slice:\n let c = f32x4::from_slice(simd, &[1.0, 2.0, 3.0, 4.0]);\n\n // From an array:\n let d = f32x4::simd_from(simd, [1.0, 2.0, 3.0, 4.0]);\n\n // From an element-wise function:\n let e = f32x4::from_fn(simd, |i| i as f32);\n}\n```"] #[derive(Clone, Copy)] #[repr(C, align(16))] @@ -559,6 +562,73 @@ impl crate::SimdInt for u8x16 { self.simd.max_u8x16(self, rhs.simd_into(self.simd)) } } +impl SimdGather for u8x16 { + type Gathered = [T; 16]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x16( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x16( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + } + } +} +impl SimdScatter for u8x16 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!(!dst.is_empty(), "scatter: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x16( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + } + } +} impl crate::SimdCombine for u8x16 { type Combined = u8x32; #[inline(always)] @@ -1038,6 +1108,73 @@ impl crate::SimdInt for u16x8 { self.simd.max_u16x8(self, rhs.simd_into(self.simd)) } } +impl SimdGather for u16x8 { + type Gathered = [T; 8]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x8( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x8( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + } + } +} +impl SimdScatter for u16x8 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!(!dst.is_empty(), "scatter: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x8( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + } + } +} impl crate::SimdCombine for u16x8 { type Combined = u16x16; #[inline(always)] @@ -1541,6 +1678,73 @@ impl SimdCvtTruncate> for u32x4 { x.simd.cvt_u32_precise_f32x4(x) } } +impl SimdGather for u32x4 { + type Gathered = [T; 4]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x4( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x4( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + } + } +} +impl SimdScatter for u32x4 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!(!dst.is_empty(), "scatter: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x4( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + } + } +} impl crate::SimdCombine for u32x4 { type Combined = u32x8; #[inline(always)] @@ -2627,6 +2831,73 @@ impl crate::SimdInt for u8x32 { self.simd.max_u8x32(self, rhs.simd_into(self.simd)) } } +impl SimdGather for u8x32 { + type Gathered = [T; 32]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x32( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x32( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + } + } +} +impl SimdScatter for u8x32 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!(!dst.is_empty(), "scatter: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x32( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + } + } +} impl crate::SimdSplit for u8x32 { type Split = u8x16; #[inline(always)] @@ -3137,6 +3408,73 @@ impl crate::SimdInt for u16x16 { self.simd.max_u16x16(self, rhs.simd_into(self.simd)) } } +impl SimdGather for u16x16 { + type Gathered = [T; 16]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x16( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x16( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + } + } +} +impl SimdScatter for u16x16 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!(!dst.is_empty(), "scatter: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x16( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + } + } +} impl crate::SimdSplit for u16x16 { type Split = u16x8; #[inline(always)] @@ -3661,6 +3999,73 @@ impl SimdCvtTruncate> for u32x8 { x.simd.cvt_u32_precise_f32x8(x) } } +impl SimdGather for u32x8 { + type Gathered = [T; 8]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x8( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x8( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + } + } +} +impl SimdScatter for u32x8 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!(!dst.is_empty(), "scatter: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x8( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + } + } +} impl crate::SimdSplit for u32x8 { type Split = u32x4; #[inline(always)] @@ -4769,6 +5174,73 @@ impl crate::SimdInt for u8x64 { self.simd.max_u8x64(self, rhs.simd_into(self.simd)) } } +impl SimdGather for u8x64 { + type Gathered = [T; 64]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x64( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x64( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + } + } +} +impl SimdScatter for u8x64 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!(!dst.is_empty(), "scatter: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u8x64( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + } + } +} impl crate::SimdSplit for u8x64 { type Split = u8x32; #[inline(always)] @@ -5261,6 +5733,73 @@ impl crate::SimdInt for u16x32 { self.simd.max_u16x32(self, rhs.simd_into(self.simd)) } } +impl SimdGather for u16x32 { + type Gathered = [T; 32]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x32( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x32( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + } + } +} +impl SimdScatter for u16x32 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!(!dst.is_empty(), "scatter: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u16x32( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + } + } +} impl crate::SimdSplit for u16x32 { type Split = u16x16; #[inline(always)] @@ -5777,6 +6316,73 @@ impl SimdCvtTruncate> for u32x16 { x.simd.cvt_u32_precise_f32x16(x) } } +impl SimdGather for u32x16 { + type Gathered = [T; 16]; + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x16( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + } + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + dst.len(), + "gather_into: destination slice must have the same element count as the vector type" + ); + assert!( + !src.is_empty(), + "gather_into: source slice must not be empty" + ); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x16( + self, + ((src.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + } + } +} +impl SimdScatter for u32x16 { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!( + Self::N, + src.len(), + "scatter: source slice must have the same element count as the vector type" + ); + assert!(!dst.is_empty(), "scatter: source slice must not be empty"); + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + && dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.min_u32x16( + self, + ((dst.len() - 1) as Self::Element).simd_into(self.simd), + ) + }; + for i in 0..Self::N { + unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + } + } +} impl crate::SimdSplit for u32x16 { type Split = u32x8; #[inline(always)] diff --git a/fearless_simd/src/traits.rs b/fearless_simd/src/traits.rs index 76e7039f..3eee4e35 100644 --- a/fearless_simd/src/traits.rs +++ b/fearless_simd/src/traits.rs @@ -5,6 +5,7 @@ missing_docs, reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" )] + use crate::{Level, Simd, SimdBase}; /// Element-wise selection between two SIMD vectors using `self`. @@ -164,3 +165,49 @@ pub trait SimdSplit: SimdBase { /// Split this vector into left and right halves. fn split(self) -> (Self::Split, Self::Split); } + +/// Gathering of elements in a slice, treating each element in the vector as an index. Out-of-bounds +/// indices are clamped to the last element in the slice. +/// +/// Currently, this does not map to hardware "gather" instructions, but does allow you to avoid +/// bounds checks that the compiler is currently not capable of eliding. +pub trait SimdGather: SimdBase { + /// The type returned from [`SimdGather::gather`]. This will always be `[T; >::N]`, but associated constants are currently not powerful enough to express + /// that directly. + type Gathered; + + /// Gather elements from a slice, treating each element in this vector as an index. Returns an + /// array of gathered elements, with the same element count as the vector type. Out-of bounds + /// indices are clamped to the last element in the slice. + /// + /// Panics if the slice doesn't contain at least one element. + fn gather(self, src: &[T]) -> Self::Gathered; + /// Gather elements from a slice into another slice, treating each element in this vector as an + /// index. + /// + /// Unlike [`SimdGather::gather`], this is "length-erased", and can be used with the + /// native-width associated types on [`Simd`] (e.g. [`Simd::u32s`]). + /// + /// Panics if the slice doesn't contain at least one element, or if the destination slice + /// doesn't have the same element count as this vector. + fn gather_into(self, src: &[T], dst: &mut [T]); +} + +/// Scattering of elements into a slice, treating each element in the vector as an index to write +/// to. Out-of-bounds indices are clamped to the last element in the slice. If multiple indices are +/// identical, the order in which the writes occur is unspecified. +/// +/// Currently, this does not map to hardware "scatter" instructions, but does allow you to avoid +/// bounds checks that the compiler is currently not capable of eliding. +pub trait SimdScatter: SimdBase { + /// Scatter elements from one slice into another, treating each element in this vector as an + /// index into the destination slice. Out-of bounds indices are clamped to the last element in + /// the slice. + /// + /// Panics if the slice doesn't contain at least one element, or if the source slice doesn't + /// have the same element count as this vector. + /// + /// If multiple indices are identical, the order in which the writes occur is unspecified. + fn scatter(self, src: &[T], dst: &mut [T]); +} diff --git a/fearless_simd_gen/src/mk_simd_types.rs b/fearless_simd_gen/src/mk_simd_types.rs index 4dd9f0c2..81b111ea 100644 --- a/fearless_simd_gen/src/mk_simd_types.rs +++ b/fearless_simd_gen/src/mk_simd_types.rs @@ -15,7 +15,7 @@ use crate::{ pub(crate) fn mk_simd_types() -> TokenStream { let mut result = quote! { - use crate::{Bytes, Select, Simd, SimdBase, SimdFrom, SimdInto, SimdCvtFloat, SimdCvtTruncate}; + use crate::{Bytes, Select, Simd, SimdBase, SimdFrom, SimdInto, SimdGather, SimdScatter, SimdCvtFloat, SimdCvtTruncate}; }; for ty in SIMD_TYPES { let name = ty.rust(); @@ -63,6 +63,8 @@ pub(crate) fn mk_simd_types() -> TokenStream { }; let impl_block = simd_vec_impl(ty); let mut conditional_impls = Vec::new(); + + // Conversion operations // TODO: Relax `if` clauses once 64-bit integer or 16-bit floats vectors are implemented match ty.scalar { ScalarType::Float if ty.scalar_bits == 32 => { @@ -133,6 +135,94 @@ pub(crate) fn mk_simd_types() -> TokenStream { } _ => {} } + + // Scatter/gather operations + if ty.scalar == ScalarType::Unsigned { + let min_method = generic_op_name("min", ty); + conditional_impls.push(quote! { + impl SimdGather for #name { + type Gathered = [T; #len]; + + #[inline(always)] + fn gather(self, src: &[T]) -> Self::Gathered { + assert!(!src.is_empty(), "gather: source slice must not be empty"); + + // Before ensuring the source slice is bigger than `Self::Element::MAX as usize`, we need to + // make sure that's actually a valid cast. We may eventually get an i64/u64 type, which is + // larger than `usize` on 32-bit platforms. If our `Element` type is wider than `usize`, then + // `Element::MAX` will be larger than any possible slice length. + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() && + src.len() > Self::Element::MAX as usize + { + // The slice is big enough to accept any index. For instance, if this is a vector of `u8`s, + // `Self::Element::MAX` is 255, so the slice must be at least 256 elements long. + self + } else { + // No `max(0)`; we do not implement `SimdGather` for signed integers. + // + // Converting `src.len() - 1` to `Self::Element` will not wrap, because if `src.len() - 1 >= + // Self::Element::MAX`, that means that `src.len() > Self::Element::MAX`, and we take the + // above branch instead. + self.simd.#min_method(self, ((src.len() - 1) as Self::Element).simd_into(self.simd)) + }; + + core::array::from_fn(|i| unsafe { + // Safety: All elements of `inbounds` are in [0, src.len()). 0 is a valid index, because we + // asserted that `src` is not empty. + *src.get_unchecked(inbounds[i] as usize) + }) + } + + #[inline(always)] + fn gather_into(self, src: &[T], dst: &mut [T]) { + assert_eq!(Self::N, dst.len(), "gather_into: destination slice must have the same element count as the vector type"); + assert!(!src.is_empty(), "gather_into: source slice must not be empty"); + + // Same logic as for `gather`. See the comments there. + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() && + src.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.#min_method(self, ((src.len() - 1) as Self::Element).simd_into(self.simd)) + }; + + for i in 0..Self::N { + dst[i] = unsafe { + // Safety: All elements of `inbounds` are in [0, src.len()). 0 is a valid index, because + // we asserted that `src` is not empty. + *src.get_unchecked(inbounds[i] as usize) + } + } + } + } + + impl SimdScatter for #name { + #[inline(always)] + fn scatter(self, src: &[T], dst: &mut [T]) { + assert_eq!(Self::N, src.len(), "scatter: source slice must have the same element count as the vector type"); + assert!(!dst.is_empty(), "scatter: source slice must not be empty"); + + // Same logic as for `gather`, but for `dst`. See the comments there. + let inbounds = if core::mem::size_of::() <= core::mem::size_of::() && + dst.len() > Self::Element::MAX as usize + { + self + } else { + self.simd.#min_method(self, ((dst.len() - 1) as Self::Element).simd_into(self.simd)) + }; + + for i in 0..Self::N { + // Safety: All elements of `inbounds` are in [0, dst.len()). 0 is a valid index, because we + // asserted that `dst` is not empty. + unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + } + } + } + }); + } + + // Split/combine operations if let Some(half_ty) = ty.split_operand() { let half_ty_rust = half_ty.rust(); let split_method = generic_op_name("split", ty); @@ -161,6 +251,7 @@ pub(crate) fn mk_simd_types() -> TokenStream { } }); } + result.extend(quote! { #[doc = #doc] #[derive(Clone, Copy)] From 06c7e9381b4cb63b548381a4dc51102bd426de56 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Tue, 23 Dec 2025 15:15:28 -0500 Subject: [PATCH 02/10] Use unchecked indexing in scatter/gather --- fearless_simd/src/generated/simd_types.rs | 117 +++++++++++++++++----- fearless_simd_gen/src/mk_simd_types.rs | 22 ++-- 2 files changed, 105 insertions(+), 34 deletions(-) diff --git a/fearless_simd/src/generated/simd_types.rs b/fearless_simd/src/generated/simd_types.rs index a1012000..a7d1e3e5 100644 --- a/fearless_simd/src/generated/simd_types.rs +++ b/fearless_simd/src/generated/simd_types.rs @@ -577,7 +577,8 @@ impl SimdGather for u8x16 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + let inbounds = &*inbounds; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] fn gather_into(self, src: &[T], dst: &mut [T]) { @@ -600,8 +601,11 @@ impl SimdGather for u8x16 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + } } } } @@ -624,8 +628,11 @@ impl SimdScatter for u8x16 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) + }; } } } @@ -1123,7 +1130,8 @@ impl SimdGather for u16x8 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + let inbounds = &*inbounds; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] fn gather_into(self, src: &[T], dst: &mut [T]) { @@ -1146,8 +1154,11 @@ impl SimdGather for u16x8 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + } } } } @@ -1170,8 +1181,11 @@ impl SimdScatter for u16x8 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) + }; } } } @@ -1693,7 +1707,8 @@ impl SimdGather for u32x4 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + let inbounds = &*inbounds; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] fn gather_into(self, src: &[T], dst: &mut [T]) { @@ -1716,8 +1731,11 @@ impl SimdGather for u32x4 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + } } } } @@ -1740,8 +1758,11 @@ impl SimdScatter for u32x4 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) + }; } } } @@ -2846,7 +2867,8 @@ impl SimdGather for u8x32 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + let inbounds = &*inbounds; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] fn gather_into(self, src: &[T], dst: &mut [T]) { @@ -2869,8 +2891,11 @@ impl SimdGather for u8x32 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + } } } } @@ -2893,8 +2918,11 @@ impl SimdScatter for u8x32 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) + }; } } } @@ -3423,7 +3451,8 @@ impl SimdGather for u16x16 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + let inbounds = &*inbounds; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] fn gather_into(self, src: &[T], dst: &mut [T]) { @@ -3446,8 +3475,11 @@ impl SimdGather for u16x16 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + } } } } @@ -3470,8 +3502,11 @@ impl SimdScatter for u16x16 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) + }; } } } @@ -4014,7 +4049,8 @@ impl SimdGather for u32x8 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + let inbounds = &*inbounds; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] fn gather_into(self, src: &[T], dst: &mut [T]) { @@ -4037,8 +4073,11 @@ impl SimdGather for u32x8 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + } } } } @@ -4061,8 +4100,11 @@ impl SimdScatter for u32x8 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) + }; } } } @@ -5189,7 +5231,8 @@ impl SimdGather for u8x64 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + let inbounds = &*inbounds; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] fn gather_into(self, src: &[T], dst: &mut [T]) { @@ -5212,8 +5255,11 @@ impl SimdGather for u8x64 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + } } } } @@ -5236,8 +5282,11 @@ impl SimdScatter for u8x64 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) + }; } } } @@ -5748,7 +5797,8 @@ impl SimdGather for u16x32 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + let inbounds = &*inbounds; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] fn gather_into(self, src: &[T], dst: &mut [T]) { @@ -5771,8 +5821,11 @@ impl SimdGather for u16x32 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + } } } } @@ -5795,8 +5848,11 @@ impl SimdScatter for u16x32 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) + }; } } } @@ -6331,7 +6387,8 @@ impl SimdGather for u32x16 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - core::array::from_fn(|i| unsafe { *src.get_unchecked(inbounds[i] as usize) }) + let inbounds = &*inbounds; + core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] fn gather_into(self, src: &[T], dst: &mut [T]) { @@ -6354,8 +6411,11 @@ impl SimdGather for u32x16 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - dst[i] = unsafe { *src.get_unchecked(inbounds[i] as usize) } + unsafe { + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + } } } } @@ -6378,8 +6438,11 @@ impl SimdScatter for u32x16 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; + let inbounds = &*inbounds; for i in 0..Self::N { - unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + unsafe { + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) + }; } } } diff --git a/fearless_simd_gen/src/mk_simd_types.rs b/fearless_simd_gen/src/mk_simd_types.rs index 81b111ea..7b2d6c1c 100644 --- a/fearless_simd_gen/src/mk_simd_types.rs +++ b/fearless_simd_gen/src/mk_simd_types.rs @@ -166,10 +166,12 @@ pub(crate) fn mk_simd_types() -> TokenStream { self.simd.#min_method(self, ((src.len() - 1) as Self::Element).simd_into(self.simd)) }; + let inbounds = &*inbounds; core::array::from_fn(|i| unsafe { // Safety: All elements of `inbounds` are in [0, src.len()). 0 is a valid index, because we - // asserted that `src` is not empty. - *src.get_unchecked(inbounds[i] as usize) + // asserted that `src` is not empty. Therefore, the index into `src` is valid. `i` will be + // between [0, Self::N), so the index into `inbounds` is valid. + *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } @@ -187,11 +189,14 @@ pub(crate) fn mk_simd_types() -> TokenStream { self.simd.#min_method(self, ((src.len() - 1) as Self::Element).simd_into(self.simd)) }; + let inbounds = &*inbounds; for i in 0..Self::N { - dst[i] = unsafe { + unsafe { // Safety: All elements of `inbounds` are in [0, src.len()). 0 is a valid index, because - // we asserted that `src` is not empty. - *src.get_unchecked(inbounds[i] as usize) + // we asserted that `src` is not empty. Therefore, the index into `src` is valid. `i` + // will be between [0, Self::N), so the index into `inbounds` is valid. The index into + // `dst` is also valid, since we asserted above that `dst.len() == Self::N`. + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) } } } @@ -212,10 +217,13 @@ pub(crate) fn mk_simd_types() -> TokenStream { self.simd.#min_method(self, ((dst.len() - 1) as Self::Element).simd_into(self.simd)) }; + let inbounds = &*inbounds; for i in 0..Self::N { // Safety: All elements of `inbounds` are in [0, dst.len()). 0 is a valid index, because we - // asserted that `dst` is not empty. - unsafe { *dst.get_unchecked_mut(inbounds[i] as usize) = src[i] }; + // asserted that `dst` is not empty. Therefore, the index into `dst` is valid. `i` will be + // between [0, Self::N), so the index into `inbounds` is valid. The index into `src` is also + // valid, since we asserted above that `src.len() == Self::N`. + unsafe { *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) }; } } } From 8b82d0cfc5b045b911e8be6e6ed722668281c2d7 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Tue, 23 Dec 2025 15:15:57 -0500 Subject: [PATCH 03/10] Fix assert message --- fearless_simd_gen/src/mk_simd_types.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fearless_simd_gen/src/mk_simd_types.rs b/fearless_simd_gen/src/mk_simd_types.rs index 7b2d6c1c..46e5bd83 100644 --- a/fearless_simd_gen/src/mk_simd_types.rs +++ b/fearless_simd_gen/src/mk_simd_types.rs @@ -206,7 +206,7 @@ pub(crate) fn mk_simd_types() -> TokenStream { #[inline(always)] fn scatter(self, src: &[T], dst: &mut [T]) { assert_eq!(Self::N, src.len(), "scatter: source slice must have the same element count as the vector type"); - assert!(!dst.is_empty(), "scatter: source slice must not be empty"); + assert!(!dst.is_empty(), "scatter: destination slice must not be empty"); // Same logic as for `gather`, but for `dst`. See the comments there. let inbounds = if core::mem::size_of::() <= core::mem::size_of::() && From 16efc6bcd59742192bca9965a0901804f1b2ae88 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Wed, 24 Dec 2025 17:04:04 -0500 Subject: [PATCH 04/10] Update size_of check --- fearless_simd/src/generated/simd_types.rs | 99 ++++++++++++++--------- fearless_simd_gen/src/mk_simd_types.rs | 13 ++- 2 files changed, 69 insertions(+), 43 deletions(-) diff --git a/fearless_simd/src/generated/simd_types.rs b/fearless_simd/src/generated/simd_types.rs index a7d1e3e5..9c7ce3bf 100644 --- a/fearless_simd/src/generated/simd_types.rs +++ b/fearless_simd/src/generated/simd_types.rs @@ -567,7 +567,7 @@ impl SimdGather for u8x16 { #[inline(always)] fn gather(self, src: &[T]) -> Self::Gathered { assert!(!src.is_empty(), "gather: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -591,7 +591,7 @@ impl SimdGather for u8x16 { !src.is_empty(), "gather_into: source slice must not be empty" ); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -617,8 +617,11 @@ impl SimdScatter for u8x16 { src.len(), "scatter: source slice must have the same element count as the vector type" ); - assert!(!dst.is_empty(), "scatter: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && dst.len() > Self::Element::MAX as usize { self @@ -1120,7 +1123,7 @@ impl SimdGather for u16x8 { #[inline(always)] fn gather(self, src: &[T]) -> Self::Gathered { assert!(!src.is_empty(), "gather: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -1144,7 +1147,7 @@ impl SimdGather for u16x8 { !src.is_empty(), "gather_into: source slice must not be empty" ); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -1170,8 +1173,11 @@ impl SimdScatter for u16x8 { src.len(), "scatter: source slice must have the same element count as the vector type" ); - assert!(!dst.is_empty(), "scatter: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && dst.len() > Self::Element::MAX as usize { self @@ -1697,7 +1703,7 @@ impl SimdGather for u32x4 { #[inline(always)] fn gather(self, src: &[T]) -> Self::Gathered { assert!(!src.is_empty(), "gather: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -1721,7 +1727,7 @@ impl SimdGather for u32x4 { !src.is_empty(), "gather_into: source slice must not be empty" ); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -1747,8 +1753,11 @@ impl SimdScatter for u32x4 { src.len(), "scatter: source slice must have the same element count as the vector type" ); - assert!(!dst.is_empty(), "scatter: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && dst.len() > Self::Element::MAX as usize { self @@ -2857,7 +2866,7 @@ impl SimdGather for u8x32 { #[inline(always)] fn gather(self, src: &[T]) -> Self::Gathered { assert!(!src.is_empty(), "gather: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -2881,7 +2890,7 @@ impl SimdGather for u8x32 { !src.is_empty(), "gather_into: source slice must not be empty" ); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -2907,8 +2916,11 @@ impl SimdScatter for u8x32 { src.len(), "scatter: source slice must have the same element count as the vector type" ); - assert!(!dst.is_empty(), "scatter: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && dst.len() > Self::Element::MAX as usize { self @@ -3441,7 +3453,7 @@ impl SimdGather for u16x16 { #[inline(always)] fn gather(self, src: &[T]) -> Self::Gathered { assert!(!src.is_empty(), "gather: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -3465,7 +3477,7 @@ impl SimdGather for u16x16 { !src.is_empty(), "gather_into: source slice must not be empty" ); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -3491,8 +3503,11 @@ impl SimdScatter for u16x16 { src.len(), "scatter: source slice must have the same element count as the vector type" ); - assert!(!dst.is_empty(), "scatter: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && dst.len() > Self::Element::MAX as usize { self @@ -4039,7 +4054,7 @@ impl SimdGather for u32x8 { #[inline(always)] fn gather(self, src: &[T]) -> Self::Gathered { assert!(!src.is_empty(), "gather: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -4063,7 +4078,7 @@ impl SimdGather for u32x8 { !src.is_empty(), "gather_into: source slice must not be empty" ); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -4089,8 +4104,11 @@ impl SimdScatter for u32x8 { src.len(), "scatter: source slice must have the same element count as the vector type" ); - assert!(!dst.is_empty(), "scatter: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && dst.len() > Self::Element::MAX as usize { self @@ -5221,7 +5239,7 @@ impl SimdGather for u8x64 { #[inline(always)] fn gather(self, src: &[T]) -> Self::Gathered { assert!(!src.is_empty(), "gather: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -5245,7 +5263,7 @@ impl SimdGather for u8x64 { !src.is_empty(), "gather_into: source slice must not be empty" ); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -5271,8 +5289,11 @@ impl SimdScatter for u8x64 { src.len(), "scatter: source slice must have the same element count as the vector type" ); - assert!(!dst.is_empty(), "scatter: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && dst.len() > Self::Element::MAX as usize { self @@ -5787,7 +5808,7 @@ impl SimdGather for u16x32 { #[inline(always)] fn gather(self, src: &[T]) -> Self::Gathered { assert!(!src.is_empty(), "gather: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -5811,7 +5832,7 @@ impl SimdGather for u16x32 { !src.is_empty(), "gather_into: source slice must not be empty" ); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -5837,8 +5858,11 @@ impl SimdScatter for u16x32 { src.len(), "scatter: source slice must have the same element count as the vector type" ); - assert!(!dst.is_empty(), "scatter: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && dst.len() > Self::Element::MAX as usize { self @@ -6377,7 +6401,7 @@ impl SimdGather for u32x16 { #[inline(always)] fn gather(self, src: &[T]) -> Self::Gathered { assert!(!src.is_empty(), "gather: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -6401,7 +6425,7 @@ impl SimdGather for u32x16 { !src.is_empty(), "gather_into: source slice must not be empty" ); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -6427,8 +6451,11 @@ impl SimdScatter for u32x16 { src.len(), "scatter: source slice must have the same element count as the vector type" ); - assert!(!dst.is_empty(), "scatter: source slice must not be empty"); - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() + assert!( + !dst.is_empty(), + "scatter: destination slice must not be empty" + ); + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && dst.len() > Self::Element::MAX as usize { self diff --git a/fearless_simd_gen/src/mk_simd_types.rs b/fearless_simd_gen/src/mk_simd_types.rs index 46e5bd83..14ee072e 100644 --- a/fearless_simd_gen/src/mk_simd_types.rs +++ b/fearless_simd_gen/src/mk_simd_types.rs @@ -147,11 +147,10 @@ pub(crate) fn mk_simd_types() -> TokenStream { fn gather(self, src: &[T]) -> Self::Gathered { assert!(!src.is_empty(), "gather: source slice must not be empty"); - // Before ensuring the source slice is bigger than `Self::Element::MAX as usize`, we need to - // make sure that's actually a valid cast. We may eventually get an i64/u64 type, which is - // larger than `usize` on 32-bit platforms. If our `Element` type is wider than `usize`, then - // `Element::MAX` will be larger than any possible slice length. - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() && + // Check if the element type is small enough that the slice's length could (and then does) + // exceed its maximum value. The `size_of` check ensures that `Self::Element::MAX as usize` will + // never truncate/wrap. + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { // The slice is big enough to accept any index. For instance, if this is a vector of `u8`s, @@ -181,7 +180,7 @@ pub(crate) fn mk_simd_types() -> TokenStream { assert!(!src.is_empty(), "gather_into: source slice must not be empty"); // Same logic as for `gather`. See the comments there. - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() && + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && src.len() > Self::Element::MAX as usize { self @@ -209,7 +208,7 @@ pub(crate) fn mk_simd_types() -> TokenStream { assert!(!dst.is_empty(), "scatter: destination slice must not be empty"); // Same logic as for `gather`, but for `dst`. See the comments there. - let inbounds = if core::mem::size_of::() <= core::mem::size_of::() && + let inbounds = if core::mem::size_of::() < core::mem::size_of::() && dst.len() > Self::Element::MAX as usize { self From e53ee9da43b87285873f1db0395fadcf22b3bec9 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Wed, 24 Dec 2025 17:07:17 -0500 Subject: [PATCH 05/10] the blasted paperclip --- fearless_simd/src/generated/simd_types.rs | 54 +++++++++++------------ fearless_simd_gen/src/mk_simd_types.rs | 14 +++--- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/fearless_simd/src/generated/simd_types.rs b/fearless_simd/src/generated/simd_types.rs index 9c7ce3bf..79bd225e 100644 --- a/fearless_simd/src/generated/simd_types.rs +++ b/fearless_simd/src/generated/simd_types.rs @@ -604,7 +604,7 @@ impl SimdGather for u8x16 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); } } } @@ -634,8 +634,8 @@ impl SimdScatter for u8x16 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) - }; + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } } } } @@ -1160,7 +1160,7 @@ impl SimdGather for u16x8 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); } } } @@ -1190,8 +1190,8 @@ impl SimdScatter for u16x8 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) - }; + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } } } } @@ -1740,7 +1740,7 @@ impl SimdGather for u32x4 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); } } } @@ -1770,8 +1770,8 @@ impl SimdScatter for u32x4 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) - }; + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } } } } @@ -2903,7 +2903,7 @@ impl SimdGather for u8x32 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); } } } @@ -2933,8 +2933,8 @@ impl SimdScatter for u8x32 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) - }; + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } } } } @@ -3490,7 +3490,7 @@ impl SimdGather for u16x16 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); } } } @@ -3520,8 +3520,8 @@ impl SimdScatter for u16x16 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) - }; + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } } } } @@ -4091,7 +4091,7 @@ impl SimdGather for u32x8 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); } } } @@ -4121,8 +4121,8 @@ impl SimdScatter for u32x8 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) - }; + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } } } } @@ -5276,7 +5276,7 @@ impl SimdGather for u8x64 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); } } } @@ -5306,8 +5306,8 @@ impl SimdScatter for u8x64 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) - }; + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } } } } @@ -5845,7 +5845,7 @@ impl SimdGather for u16x32 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); } } } @@ -5875,8 +5875,8 @@ impl SimdScatter for u16x32 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) - }; + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } } } } @@ -6438,7 +6438,7 @@ impl SimdGather for u32x16 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); } } } @@ -6468,8 +6468,8 @@ impl SimdScatter for u32x16 { let inbounds = &*inbounds; for i in 0..Self::N { unsafe { - *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) - }; + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } } } } diff --git a/fearless_simd_gen/src/mk_simd_types.rs b/fearless_simd_gen/src/mk_simd_types.rs index 14ee072e..bf6bf699 100644 --- a/fearless_simd_gen/src/mk_simd_types.rs +++ b/fearless_simd_gen/src/mk_simd_types.rs @@ -195,7 +195,7 @@ pub(crate) fn mk_simd_types() -> TokenStream { // we asserted that `src` is not empty. Therefore, the index into `src` is valid. `i` // will be between [0, Self::N), so the index into `inbounds` is valid. The index into // `dst` is also valid, since we asserted above that `dst.len() == Self::N`. - *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize) + *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); } } } @@ -218,11 +218,13 @@ pub(crate) fn mk_simd_types() -> TokenStream { let inbounds = &*inbounds; for i in 0..Self::N { - // Safety: All elements of `inbounds` are in [0, dst.len()). 0 is a valid index, because we - // asserted that `dst` is not empty. Therefore, the index into `dst` is valid. `i` will be - // between [0, Self::N), so the index into `inbounds` is valid. The index into `src` is also - // valid, since we asserted above that `src.len() == Self::N`. - unsafe { *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i) }; + unsafe { + // Safety: All elements of `inbounds` are in [0, dst.len()). 0 is a valid index, because + // we asserted that `dst` is not empty. Therefore, the index into `dst` is valid. `i` + // will be between [0, Self::N), so the index into `inbounds` is valid. The index into + // `src` is also valid, since we asserted above that `src.len() == Self::N`. + *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); + } } } } From a36e380077e91ad9535c7fa2fb50ffcd3d9ca037 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Fri, 26 Dec 2025 07:16:36 -0500 Subject: [PATCH 06/10] Copy over simd_test attrs --- fearless_simd_dev_macros/src/lib.rs | 56 ++++++----------------------- 1 file changed, 10 insertions(+), 46 deletions(-) diff --git a/fearless_simd_dev_macros/src/lib.rs b/fearless_simd_dev_macros/src/lib.rs index 1cb2aa9c..5ac987bc 100644 --- a/fearless_simd_dev_macros/src/lib.rs +++ b/fearless_simd_dev_macros/src/lib.rs @@ -23,28 +23,15 @@ pub fn simd_test(_: TokenStream, item: TokenStream) -> TokenStream { let avx2_name = get_ident("avx2"); let wasm_name = get_ident("wasm"); - let ignore_attr = |f: fn(&str) -> bool| { - let should_ignore = input_fn - .attrs - .iter() - .any(|attr| attr.path().is_ident("ignore")) - || f(&input_fn_name.to_string()); - if should_ignore { - quote! { #[ignore] } - } else { - quote! {} - } - }; - - let ignore_fallback = ignore_attr(exclude_fallback); - let ignore_neon = ignore_attr(exclude_neon); - let ignore_sse4 = ignore_attr(exclude_sse4); - let ignore_avx2 = ignore_attr(exclude_avx2); - let ignore_wasm = ignore_attr(exclude_wasm); + let test_attrs: Vec<_> = input_fn + .attrs + .iter() + .filter(|attr| !attr.path().is_ident("simd_test")) + .collect(); let fallback_snippet = quote! { + #(#test_attrs)* #[test] - #ignore_fallback fn #fallback_name() { let fallback = fearless_simd::Fallback::new(); #input_fn_name(fallback); @@ -63,8 +50,8 @@ pub fn simd_test(_: TokenStream, item: TokenStream) -> TokenStream { let neon_snippet = quote! { #[cfg(target_arch = "aarch64")] + #(#test_attrs)* #[test] - #ignore_neon fn #neon_name() { if std::arch::is_aarch64_feature_detected!("neon") { let neon = unsafe { fearless_simd::aarch64::Neon::new_unchecked() }; @@ -75,8 +62,8 @@ pub fn simd_test(_: TokenStream, item: TokenStream) -> TokenStream { let sse4_snippet = quote! { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #(#test_attrs)* #[test] - #ignore_sse4 fn #sse4_name() { if std::arch::is_x86_feature_detected!("sse4.2") { let sse4 = unsafe { fearless_simd::x86::Sse4_2::new_unchecked() }; @@ -87,8 +74,8 @@ pub fn simd_test(_: TokenStream, item: TokenStream) -> TokenStream { let avx2_snippet = quote! { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #(#test_attrs)* #[test] - #ignore_avx2 fn #avx2_name() { if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") @@ -101,8 +88,8 @@ pub fn simd_test(_: TokenStream, item: TokenStream) -> TokenStream { let wasm_snippet = quote! { #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] + #(#test_attrs)* #[test] - #ignore_wasm fn #wasm_name() { let wasm = unsafe { fearless_simd::wasm32::WasmSimd128::new_unchecked() }; #input_fn_name(wasm); @@ -120,26 +107,3 @@ pub fn simd_test(_: TokenStream, item: TokenStream) -> TokenStream { } .into() } - -// You can update below functions if you want to exclude certain tests from different architectures -// (for example because they haven't been implemented yet). - -fn exclude_neon(_test_name: &str) -> bool { - false -} - -fn exclude_fallback(_test_name: &str) -> bool { - false -} - -fn exclude_sse4(_test_name: &str) -> bool { - false -} - -fn exclude_avx2(_test_name: &str) -> bool { - false -} - -fn exclude_wasm(_test_name: &str) -> bool { - false -} From c3aedf274e1665f411bd3b9852635fbadfa2537f Mon Sep 17 00:00:00 2001 From: valadaptive Date: Fri, 26 Dec 2025 07:16:57 -0500 Subject: [PATCH 07/10] Add SimdGather+SimdScatter native-width bounds --- fearless_simd/src/generated/simd_trait.rs | 16 +++++++++++----- fearless_simd_gen/src/mk_simd_trait.rs | 8 ++++---- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/fearless_simd/src/generated/simd_trait.rs b/fearless_simd/src/generated/simd_trait.rs index b0732adf..7c6212f7 100644 --- a/fearless_simd/src/generated/simd_trait.rs +++ b/fearless_simd/src/generated/simd_trait.rs @@ -4,8 +4,8 @@ // This file is autogenerated by fearless_simd_gen use crate::{ - Bytes, Level, Select, SimdCvtFloat, SimdCvtTruncate, SimdElement, SimdFrom, SimdInto, - seal::Seal, + Bytes, Level, Select, SimdCvtFloat, SimdCvtTruncate, SimdElement, SimdFrom, SimdGather, + SimdInto, SimdScatter, seal::Seal, }; use crate::{ f32x4, f32x8, f32x16, f64x2, f64x4, f64x8, i8x16, i8x32, i8x64, i16x8, i16x16, i16x32, i32x4, @@ -63,7 +63,9 @@ pub trait Simd: #[doc = r" A native-width SIMD vector of [`f64`]s."] type f64s: SimdFloat, Mask = Self::mask64s>; #[doc = r" A native-width SIMD vector of [`u8`]s."] - type u8s: SimdInt, Mask = Self::mask8s>; + type u8s: SimdInt, Mask = Self::mask8s> + + SimdGather + + SimdScatter; #[doc = r" A native-width SIMD vector of [`i8`]s."] type i8s: SimdInt< Self, @@ -73,7 +75,9 @@ pub trait Simd: Bytes = ::Bytes, > + core::ops::Neg; #[doc = r" A native-width SIMD vector of [`u16`]s."] - type u16s: SimdInt, Mask = Self::mask16s>; + type u16s: SimdInt, Mask = Self::mask16s> + + SimdGather + + SimdScatter; #[doc = r" A native-width SIMD vector of [`i16`]s."] type i16s: SimdInt< Self, @@ -84,7 +88,9 @@ pub trait Simd: > + core::ops::Neg; #[doc = r" A native-width SIMD vector of [`u32`]s."] type u32s: SimdInt, Mask = Self::mask32s> - + SimdCvtTruncate; + + SimdCvtTruncate + + SimdGather + + SimdScatter; #[doc = r" A native-width SIMD vector of [`i32`]s."] type i32s: SimdInt< Self, diff --git a/fearless_simd_gen/src/mk_simd_trait.rs b/fearless_simd_gen/src/mk_simd_trait.rs index bd916e76..d3193fdd 100644 --- a/fearless_simd_gen/src/mk_simd_trait.rs +++ b/fearless_simd_gen/src/mk_simd_trait.rs @@ -24,7 +24,7 @@ pub(crate) fn mk_simd_trait() -> TokenStream { } } let mut code = quote! { - use crate::{seal::Seal, Level, SimdElement, SimdFrom, SimdInto, SimdCvtTruncate, SimdCvtFloat, Select, Bytes}; + use crate::{seal::Seal, Level, SimdElement, SimdFrom, SimdInto, SimdCvtTruncate, SimdCvtFloat, SimdGather, SimdScatter, Select, Bytes}; #imports /// The main SIMD trait, implemented by all SIMD token types. /// @@ -67,15 +67,15 @@ pub(crate) fn mk_simd_trait() -> TokenStream { /// A native-width SIMD vector of [`f64`]s. type f64s: SimdFloat, Mask = Self::mask64s>; /// A native-width SIMD vector of [`u8`]s. - type u8s: SimdInt, Mask = Self::mask8s>; + type u8s: SimdInt, Mask = Self::mask8s> + SimdGather + SimdScatter; /// A native-width SIMD vector of [`i8`]s. type i8s: SimdInt, Mask = Self::mask8s, Bytes = ::Bytes> + core::ops::Neg; /// A native-width SIMD vector of [`u16`]s. - type u16s: SimdInt, Mask = Self::mask16s>; + type u16s: SimdInt, Mask = Self::mask16s> + SimdGather + SimdScatter; /// A native-width SIMD vector of [`i16`]s. type i16s: SimdInt, Mask = Self::mask16s, Bytes = ::Bytes> + core::ops::Neg; /// A native-width SIMD vector of [`u32`]s. - type u32s: SimdInt, Mask = Self::mask32s> + SimdCvtTruncate; + type u32s: SimdInt, Mask = Self::mask32s> + SimdCvtTruncate + SimdGather + SimdScatter; /// A native-width SIMD vector of [`i32`]s. type i32s: SimdInt, Mask = Self::mask32s, Bytes = ::Bytes> + SimdCvtTruncate + core::ops::Neg; From 0ef8db05756ec405d887add1ad9b2ca297f4b932 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Fri, 26 Dec 2025 07:19:07 -0500 Subject: [PATCH 08/10] Add tests for gather+scatter --- fearless_simd_tests/tests/harness/mod.rs | 361 +++++++++++++++++++++++ 1 file changed, 361 insertions(+) diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index 3b618e87..a862915d 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -3207,3 +3207,364 @@ fn store_slice_f32x4(simd: S) { a.store_slice(&mut dest); assert_eq!(dest, [1.0, 2.0, 3.0, 4.0]); } + +#[simd_test] +fn gather_u16x8_basic(simd: S) { + let indices = u16x8::from_slice(simd, &[7, 6, 5, 4, 3, 2, 1, 0]); + let src = [100, 200, 300, 400, 500, 600, 700, 800]; + let result = indices.gather(&src); + assert_eq!(result, [800, 700, 600, 500, 400, 300, 200, 100]); +} + +#[simd_test] +fn gather_u32x4_basic(simd: S) { + let indices = u32x4::from_slice(simd, &[3, 1, 2, 0]); + let src = [1000, 2000, 3000, 4000]; + let result = indices.gather(&src); + assert_eq!(result, [4000, 2000, 3000, 1000]); +} + +#[simd_test] +fn gather_with_duplicate_indices(simd: S) { + let indices = u8x16::from_slice(simd, &[0, 0, 1, 1, 2, 2, 3, 3, 0, 1, 2, 3, 0, 1, 2, 3]); + let src = [10, 20, 30, 40]; + let result = indices.gather(&src); + assert_eq!( + result, + [ + 10, 10, 20, 20, 30, 30, 40, 40, 10, 20, 30, 40, 10, 20, 30, 40 + ] + ); +} + +#[simd_test] +fn gather_out_of_bounds_clamping(simd: S) { + // Indices that are out of bounds should be clamped to the last element + let indices = u8x16::from_slice( + simd, + &[0, 1, 2, 100, 200, 255, 3, 4, 0, 50, 99, 1, 2, 3, 4, 150], + ); + let src = [10, 20, 30, 40, 50]; + let result = indices.gather(&src); + assert_eq!( + result, + [ + 10, 20, 30, 50, 50, 50, 40, 50, 10, 50, 50, 20, 30, 40, 50, 50 + ] + ); +} + +#[simd_test] +fn gather_u8x16_basic(simd: S) { + let indices = u8x16::from_slice( + simd, + &[0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15], + ); + let src = [ + 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, + ]; + let result = indices.gather(&src); + assert_eq!( + result, + [ + 10, 30, 50, 70, 90, 110, 130, 150, 20, 40, 60, 80, 100, 120, 140, 160 + ] + ); +} + +#[simd_test] +fn gather_u32x4_out_of_bounds(simd: S) { + let indices = u32x4::from_slice(simd, &[0, 1000, u32::MAX, 2]); + let src = [100, 200, 300]; + let result = indices.gather(&src); + assert_eq!(result, [100, 300, 300, 300]); +} + +#[simd_test] +fn gather_single_element_source(simd: S) { + // All indices should point to the single element + let indices = u8x16::from_slice( + simd, + &[0, 5, 10, 255, 100, 50, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + ); + let src = [42]; + let result = indices.gather(&src); + assert_eq!(result, [42; 16]); +} + +// Note that the #[should_panic] tests are automatically skipped on wasm32-wasip1 with the default runner, which has +// panic=abort. cargo-nextest appears to support these, since it runs each test in its own process. +#[simd_test] +#[should_panic(expected = "gather: source slice must not be empty")] +fn gather_empty_source_panics(simd: S) { + let indices = u8x16::splat(simd, 0); + let src: [i32; 0] = []; + let _result = indices.gather(&src); +} + +#[simd_test] +fn gather_into_u8x16_basic(simd: S) { + let indices = u8x16::from_slice( + simd, + &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + ); + let src = [100; 20]; + let mut dst = [0; 16]; + indices.gather_into(&src, &mut dst); + assert_eq!(dst, [100; 16]); +} + +#[simd_test] +fn gather_into_u32x4_basic(simd: S) { + let indices = u32x4::from_slice(simd, &[2, 0, 3, 1]); + let src = [10, 20, 30, 40]; + let mut dst = [0; 4]; + indices.gather_into(&src, &mut dst); + assert_eq!(dst, [30, 10, 40, 20]); +} + +#[simd_test] +fn gather_into_with_clamping(simd: S) { + let indices = u16x8::from_slice(simd, &[0, 1, 2, 100, 200, 500, 1000, u16::MAX]); + let src = [5, 10, 15]; + let mut dst = [0; 8]; + indices.gather_into(&src, &mut dst); + // All out-of-bounds indices should clamp to index 2 + assert_eq!(dst, [5, 10, 15, 15, 15, 15, 15, 15]); +} + +#[simd_test] +#[should_panic(expected = "gather_into: source slice must not be empty")] +fn gather_into_empty_source_panics(simd: S) { + let indices = u32x4::splat(simd, 0); + let src: [i32; 0] = []; + let mut dst = [0; 4]; + indices.gather_into(&src, &mut dst); +} + +#[simd_test] +#[should_panic( + expected = "gather_into: destination slice must have the same element count as the vector type" +)] +fn gather_into_wrong_dst_size_panics(simd: S) { + let indices = u8x16::splat(simd, 0); + let src = [1, 2, 3]; + let mut dst = [0; 8]; // Should be 16 + indices.gather_into(&src, &mut dst); +} + +#[simd_test] +fn scatter_u8x16_basic(simd: S) { + let indices = u8x16::from_slice( + simd, + &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + ); + let src = [ + 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, + ]; + let mut dst = [0; 16]; + indices.scatter(&src, &mut dst); + assert_eq!(dst, src); +} + +#[simd_test] +fn scatter_u16x8_basic(simd: S) { + let indices = u16x8::from_slice(simd, &[7, 6, 5, 4, 3, 2, 1, 0]); + let src = [100, 200, 300, 400, 500, 600, 700, 800]; + let mut dst = [0; 8]; + indices.scatter(&src, &mut dst); + assert_eq!(dst, [800, 700, 600, 500, 400, 300, 200, 100]); +} + +#[simd_test] +fn scatter_u32x4_basic(simd: S) { + let indices = u32x4::from_slice(simd, &[2, 0, 3, 1]); + let src = [10, 20, 30, 40]; + let mut dst = [0; 4]; + indices.scatter(&src, &mut dst); + assert_eq!(dst, [20, 40, 10, 30]); +} + +#[simd_test] +fn scatter_with_duplicate_indices(simd: S) { + // When multiple indices point to the same location, one of them will win + // The behavior is unspecified, but all should be valid values from src + let indices = u8x16::from_slice(simd, &[0, 0, 1, 1, 2, 2, 3, 3, 0, 1, 2, 3, 0, 1, 2, 3]); + let src = [ + 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, + ]; + let mut dst = [0; 4]; + indices.scatter(&src, &mut dst); + + assert!([10, 20, 90, 130].contains(&dst[0])); + assert!([30, 40, 100, 140].contains(&dst[1])); + assert!([50, 60, 110, 150].contains(&dst[2])); + assert!([70, 80, 120, 160].contains(&dst[3])); +} + +#[simd_test] +fn scatter_out_of_bounds_clamping(simd: S) { + // Out of bounds indices should be clamped to the last element + let indices = u8x16::from_slice( + simd, + &[0, 1, 2, 100, 200, 255, 3, 4, 0, 50, 99, 1, 2, 3, 4, 150], + ); + let src = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut dst = [0; 5]; + indices.scatter(&src, &mut dst); + + assert!([1, 9].contains(&dst[0])); + assert!([2, 12].contains(&dst[1])); + assert!([3, 13].contains(&dst[2])); + assert!([7, 14].contains(&dst[3])); + assert!([5, 6, 8, 10, 11, 15, 16].contains(&dst[4])); +} + +#[simd_test] +fn scatter_single_element_destination(simd: S) { + // All indices should be clamped to 0, so all writes go to the same location + let indices = u8x16::from_slice( + simd, + &[0, 5, 10, 255, 100, 50, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + ); + let src = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut dst = [0]; + indices.scatter(&src, &mut dst); + + assert!((1..=16).contains(&dst[0])); +} + +#[simd_test] +fn scatter_u32x4_out_of_bounds(simd: S) { + let indices = u32x4::from_slice(simd, &[0, 1000, u32::MAX, 2]); + let src = [100, 200, 300, 400]; + let mut dst = [0; 3]; + indices.scatter(&src, &mut dst); + + assert_eq!(dst[0], 100); + assert!([200, 300, 400].contains(&dst[2])); +} + +#[simd_test] +#[should_panic(expected = "scatter: destination slice must not be empty")] +fn scatter_empty_destination_panics(simd: S) { + let indices = u8x16::splat(simd, 0); + let src = [0; 16]; + let mut dst: [i32; 0] = []; + indices.scatter(&src, &mut dst); +} + +#[simd_test] +#[should_panic( + expected = "scatter: source slice must have the same element count as the vector type" +)] +fn scatter_wrong_src_size_panics(simd: S) { + let indices = u8x16::splat(simd, 0); + let src = [1, 2, 3]; // Should be 16 + let mut dst = [0; 10]; + indices.scatter(&src, &mut dst); +} + +// ===== Additional edge case tests ===== + +#[simd_test] +fn gather_scatter_roundtrip(simd: S) { + // Test that gather followed by scatter with the same indices preserves data + let indices = u32x4::from_slice(simd, &[3, 1, 2, 0]); + let original = [100, 200, 300, 400]; + + let gathered = indices.gather(&original); + assert_eq!(gathered, [400, 200, 300, 100]); + + let mut result = [0; 4]; + indices.scatter(&gathered, &mut result); + assert_eq!(result, original); +} + +#[simd_test] +fn gather_u16x16_native_width(simd: S) { + let data: Vec = (0..100).collect(); + let indices = S::u16s::from_slice(simd, &vec![5_u16; S::u16s::N]); + + let mut result = vec![0_u32; S::u16s::N]; + indices.gather_into(&data, &mut result); + assert_eq!(result, vec![5_u32; S::u16s::N]); +} + +#[simd_test] +fn scatter_u32_native_width(simd: S) { + let src = vec![42_u64; S::u32s::N]; + let mut dst = vec![0_u64; 100]; + + let indices = S::u32s::from_slice(simd, &vec![10_u32; S::u32s::N]); + indices.scatter(&src, &mut dst); + + for (i, item) in dst.iter().enumerate() { + if i == 10 { + assert_eq!(*item, 42); + } else { + assert_eq!(*item, 0); + } + } +} + +#[simd_test] +fn gather_with_large_type(simd: S) { + #[derive(Debug, Clone, Copy, PartialEq)] + struct LargeStruct { + a: u64, + b: u64, + c: u64, + } + + let src = [ + LargeStruct { a: 1, b: 2, c: 3 }, + LargeStruct { a: 4, b: 5, c: 6 }, + LargeStruct { a: 7, b: 8, c: 9 }, + LargeStruct { + a: 10, + b: 11, + c: 12, + }, + ]; + + let indices = u32x4::from_slice(simd, &[3, 0, 2, 1]); + let result = indices.gather(&src); + + assert_eq!( + result[0], + LargeStruct { + a: 10, + b: 11, + c: 12 + } + ); + assert_eq!(result[1], LargeStruct { a: 1, b: 2, c: 3 }); + assert_eq!(result[2], LargeStruct { a: 7, b: 8, c: 9 }); + assert_eq!(result[3], LargeStruct { a: 4, b: 5, c: 6 }); +} + +#[simd_test] +fn scatter_with_large_type(simd: S) { + #[derive(Debug, Clone, Copy, PartialEq)] + struct LargeStruct { + a: u64, + b: u64, + } + + let src = [ + LargeStruct { a: 1, b: 2 }, + LargeStruct { a: 3, b: 4 }, + LargeStruct { a: 5, b: 6 }, + LargeStruct { a: 7, b: 8 }, + ]; + + let indices = u32x4::from_slice(simd, &[2, 0, 3, 1]); + let mut dst = [LargeStruct { a: 0, b: 0 }; 4]; + indices.scatter(&src, &mut dst); + + assert_eq!(dst[0], LargeStruct { a: 3, b: 4 }); + assert_eq!(dst[1], LargeStruct { a: 7, b: 8 }); + assert_eq!(dst[2], LargeStruct { a: 1, b: 2 }); + assert_eq!(dst[3], LargeStruct { a: 5, b: 6 }); +} From fa0a66d33d3f2af367a061252172c4eba5b8b585 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Fri, 26 Dec 2025 07:21:29 -0500 Subject: [PATCH 09/10] Remove &*inbounds --- fearless_simd/src/generated/simd_types.rs | 27 ----------------------- fearless_simd_gen/src/mk_simd_types.rs | 3 --- 2 files changed, 30 deletions(-) diff --git a/fearless_simd/src/generated/simd_types.rs b/fearless_simd/src/generated/simd_types.rs index 79bd225e..c5ff4928 100644 --- a/fearless_simd/src/generated/simd_types.rs +++ b/fearless_simd/src/generated/simd_types.rs @@ -577,7 +577,6 @@ impl SimdGather for u8x16 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] @@ -601,7 +600,6 @@ impl SimdGather for u8x16 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); @@ -631,7 +629,6 @@ impl SimdScatter for u8x16 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); @@ -1133,7 +1130,6 @@ impl SimdGather for u16x8 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] @@ -1157,7 +1153,6 @@ impl SimdGather for u16x8 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); @@ -1187,7 +1182,6 @@ impl SimdScatter for u16x8 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); @@ -1713,7 +1707,6 @@ impl SimdGather for u32x4 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] @@ -1737,7 +1730,6 @@ impl SimdGather for u32x4 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); @@ -1767,7 +1759,6 @@ impl SimdScatter for u32x4 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); @@ -2876,7 +2867,6 @@ impl SimdGather for u8x32 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] @@ -2900,7 +2890,6 @@ impl SimdGather for u8x32 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); @@ -2930,7 +2919,6 @@ impl SimdScatter for u8x32 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); @@ -3463,7 +3451,6 @@ impl SimdGather for u16x16 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] @@ -3487,7 +3474,6 @@ impl SimdGather for u16x16 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); @@ -3517,7 +3503,6 @@ impl SimdScatter for u16x16 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); @@ -4064,7 +4049,6 @@ impl SimdGather for u32x8 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] @@ -4088,7 +4072,6 @@ impl SimdGather for u32x8 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); @@ -4118,7 +4101,6 @@ impl SimdScatter for u32x8 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); @@ -5249,7 +5231,6 @@ impl SimdGather for u8x64 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] @@ -5273,7 +5254,6 @@ impl SimdGather for u8x64 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); @@ -5303,7 +5283,6 @@ impl SimdScatter for u8x64 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); @@ -5818,7 +5797,6 @@ impl SimdGather for u16x32 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] @@ -5842,7 +5820,6 @@ impl SimdGather for u16x32 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); @@ -5872,7 +5849,6 @@ impl SimdScatter for u16x32 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); @@ -6411,7 +6387,6 @@ impl SimdGather for u32x16 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; core::array::from_fn(|i| unsafe { *src.get_unchecked(*inbounds.get_unchecked(i) as usize) }) } #[inline(always)] @@ -6435,7 +6410,6 @@ impl SimdGather for u32x16 { ((src.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(i) = *src.get_unchecked(*inbounds.get_unchecked(i) as usize); @@ -6465,7 +6439,6 @@ impl SimdScatter for u32x16 { ((dst.len() - 1) as Self::Element).simd_into(self.simd), ) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { *dst.get_unchecked_mut(*inbounds.get_unchecked(i) as usize) = *src.get_unchecked(i); diff --git a/fearless_simd_gen/src/mk_simd_types.rs b/fearless_simd_gen/src/mk_simd_types.rs index bf6bf699..412f03d0 100644 --- a/fearless_simd_gen/src/mk_simd_types.rs +++ b/fearless_simd_gen/src/mk_simd_types.rs @@ -165,7 +165,6 @@ pub(crate) fn mk_simd_types() -> TokenStream { self.simd.#min_method(self, ((src.len() - 1) as Self::Element).simd_into(self.simd)) }; - let inbounds = &*inbounds; core::array::from_fn(|i| unsafe { // Safety: All elements of `inbounds` are in [0, src.len()). 0 is a valid index, because we // asserted that `src` is not empty. Therefore, the index into `src` is valid. `i` will be @@ -188,7 +187,6 @@ pub(crate) fn mk_simd_types() -> TokenStream { self.simd.#min_method(self, ((src.len() - 1) as Self::Element).simd_into(self.simd)) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { // Safety: All elements of `inbounds` are in [0, src.len()). 0 is a valid index, because @@ -216,7 +214,6 @@ pub(crate) fn mk_simd_types() -> TokenStream { self.simd.#min_method(self, ((dst.len() - 1) as Self::Element).simd_into(self.simd)) }; - let inbounds = &*inbounds; for i in 0..Self::N { unsafe { // Safety: All elements of `inbounds` are in [0, dst.len()). 0 is a valid index, because From e0927a037eebd73c785af31657caeb1d0eb68d65 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Fri, 26 Dec 2025 07:36:52 -0500 Subject: [PATCH 10/10] Run `#[should_panic]` tests with fallback impl --- fearless_simd_dev_macros/src/lib.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/fearless_simd_dev_macros/src/lib.rs b/fearless_simd_dev_macros/src/lib.rs index 5ac987bc..3aa68cb3 100644 --- a/fearless_simd_dev_macros/src/lib.rs +++ b/fearless_simd_dev_macros/src/lib.rs @@ -29,6 +29,20 @@ pub fn simd_test(_: TokenStream, item: TokenStream) -> TokenStream { .filter(|attr| !attr.path().is_ident("simd_test")) .collect(); + // If this is a `#[should_panic]` test, run it with the fallback so it actually panics + let should_panic_attr = input_fn + .attrs + .iter() + .find(|attr| attr.path().is_ident("should_panic")); + let panic_else = if should_panic_attr.is_some() { + quote! { + let fallback = fearless_simd::Fallback::new(); + #input_fn_name(fallback); + } + } else { + quote! {} + }; + let fallback_snippet = quote! { #(#test_attrs)* #[test] @@ -47,6 +61,9 @@ pub fn simd_test(_: TokenStream, item: TokenStream) -> TokenStream { // target features aren't supported. This is not ideal, since it may mislead you into thinking tests have passed // when they haven't even been run, but some CI runners don't support all target features and we don't want failures // as a result of that. + // + // However, for #[should_panic] tests, we need to panic if features aren't available to avoid + // "test did not panic as expected" failures. let neon_snippet = quote! { #[cfg(target_arch = "aarch64")] @@ -56,6 +73,8 @@ pub fn simd_test(_: TokenStream, item: TokenStream) -> TokenStream { if std::arch::is_aarch64_feature_detected!("neon") { let neon = unsafe { fearless_simd::aarch64::Neon::new_unchecked() }; #input_fn_name(neon); + } else { + #panic_else } } }; @@ -68,6 +87,8 @@ pub fn simd_test(_: TokenStream, item: TokenStream) -> TokenStream { if std::arch::is_x86_feature_detected!("sse4.2") { let sse4 = unsafe { fearless_simd::x86::Sse4_2::new_unchecked() }; #input_fn_name(sse4); + } else { + #panic_else } } }; @@ -82,6 +103,8 @@ pub fn simd_test(_: TokenStream, item: TokenStream) -> TokenStream { { let avx2 = unsafe { fearless_simd::x86::Avx2::new_unchecked() }; #input_fn_name(avx2); + } else { + #panic_else } } };