From 5362a45f8de1f39f272119a7eb4470268e530658 Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Sun, 14 Sep 2025 13:31:21 +0200 Subject: [PATCH 01/17] Add u64 SimdElement --- fearless_simd/src/traits.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fearless_simd/src/traits.rs b/fearless_simd/src/traits.rs index 055526af..84087eab 100644 --- a/fearless_simd/src/traits.rs +++ b/fearless_simd/src/traits.rs @@ -119,6 +119,10 @@ impl SimdElement for i64 { type Mask = i64; } +impl SimdElement for u64 { + type Mask = u64; +} + /// Construction of integer vectors from floats by truncation pub trait SimdCvtTruncate { fn truncate_from(x: T) -> Self; From 8c4331c7162930861e4bb9b2a3de4493b1753b2c Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Sun, 14 Sep 2025 13:33:12 +0200 Subject: [PATCH 02/17] Types: Add Int/Unsigned for 126, 256, 512-bits --- fearless_simd_gen/src/types.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fearless_simd_gen/src/types.rs b/fearless_simd_gen/src/types.rs index ed0639a0..e75ba06f 100644 --- a/fearless_simd_gen/src/types.rs +++ b/fearless_simd_gen/src/types.rs @@ -116,6 +116,8 @@ pub const SIMD_TYPES: &[VecType] = &[ VecType::new(ScalarType::Mask, 32, 4), VecType::new(ScalarType::Float, 64, 2), VecType::new(ScalarType::Mask, 64, 2), + VecType::new(ScalarType::Int, 64, 2), + VecType::new(ScalarType::Unsigned, 64, 2), // 256 bit types VecType::new(ScalarType::Float, 32, 8), VecType::new(ScalarType::Int, 8, 32), @@ -129,6 +131,8 @@ pub const SIMD_TYPES: &[VecType] = &[ VecType::new(ScalarType::Mask, 32, 8), VecType::new(ScalarType::Float, 64, 4), VecType::new(ScalarType::Mask, 64, 4), + VecType::new(ScalarType::Int, 64, 4), + VecType::new(ScalarType::Unsigned, 64, 4), // 512 bit types VecType::new(ScalarType::Float, 32, 16), VecType::new(ScalarType::Int, 8, 64), @@ -142,6 +146,8 @@ pub const SIMD_TYPES: &[VecType] = &[ VecType::new(ScalarType::Mask, 32, 16), VecType::new(ScalarType::Float, 64, 8), VecType::new(ScalarType::Mask, 64, 8), + VecType::new(ScalarType::Int, 64, 8), + VecType::new(ScalarType::Unsigned, 64, 8), ]; pub fn type_imports() -> TokenStream { From a8742db77506b40f135aa5c1112272980c96be3f Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Sun, 14 Sep 2025 14:00:53 +0200 Subject: [PATCH 03/17] fixup! Add u64 SimdElement --- fearless_simd/src/traits.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fearless_simd/src/traits.rs b/fearless_simd/src/traits.rs index 84087eab..0e284f54 100644 --- a/fearless_simd/src/traits.rs +++ b/fearless_simd/src/traits.rs @@ -120,7 +120,7 @@ impl SimdElement for i64 { } impl SimdElement for u64 { - type Mask = u64; + type Mask = i64; } /// Construction of integer vectors from floats by truncation From c77c0948f462e77e1466c1f60737a6d010dcc4c4 Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Sun, 14 Sep 2025 15:13:05 +0200 Subject: [PATCH 04/17] Groundwork for {i,u}64s --- fearless_simd_gen/src/mk_avx2.rs | 3 +++ fearless_simd_gen/src/mk_fallback.rs | 3 +++ fearless_simd_gen/src/mk_simd_trait.rs | 7 +++++++ fearless_simd_gen/src/mk_sse4_2.rs | 15 +++++++++++++++ fearless_simd_gen/src/mk_wasm.rs | 8 ++++++++ fearless_simd_tests/tests/harness/mod.rs | 7 +++++++ 6 files changed, 43 insertions(+) diff --git a/fearless_simd_gen/src/mk_avx2.rs b/fearless_simd_gen/src/mk_avx2.rs index 9d501fd9..bdf81369 100644 --- a/fearless_simd_gen/src/mk_avx2.rs +++ b/fearless_simd_gen/src/mk_avx2.rs @@ -112,9 +112,12 @@ fn mk_simd_impl() -> TokenStream { type i16s = i16x8; type u32s = u32x4; type i32s = i32x4; + type u64s = u64x4; + type i64s = i64x4; type mask8s = mask8x16; type mask16s = mask16x8; type mask32s = mask32x4; + type mask64s = mask64x4; #[inline(always)] fn level(self) -> Level { Level::#level_tok(self) diff --git a/fearless_simd_gen/src/mk_fallback.rs b/fearless_simd_gen/src/mk_fallback.rs index a69eca0f..1f243550 100644 --- a/fearless_simd_gen/src/mk_fallback.rs +++ b/fearless_simd_gen/src/mk_fallback.rs @@ -402,9 +402,12 @@ fn mk_simd_impl() -> TokenStream { type i16s = i16x8; type u32s = u32x4; type i32s = i32x4; + type u64s = u64x4; + type i64s = i64x4; type mask8s = mask8x16; type mask16s = mask16x8; type mask32s = mask32x4; + type mask64s = mask64x4; #[inline(always)] fn level(self) -> Level { Level::#level_tok(self) diff --git a/fearless_simd_gen/src/mk_simd_trait.rs b/fearless_simd_gen/src/mk_simd_trait.rs index 0a784683..6071bcf7 100644 --- a/fearless_simd_gen/src/mk_simd_trait.rs +++ b/fearless_simd_gen/src/mk_simd_trait.rs @@ -44,10 +44,17 @@ pub fn mk_simd_trait() -> TokenStream { type u32s: SimdInt, Mask = Self::mask32s> + SimdCvtTruncate; type i32s: SimdInt, Mask = Self::mask32s, Bytes = ::Bytes> + SimdCvtTruncate + core::ops::Neg; + type u64s: SimdInt, Mask = Self::mask64s>; // + SimdCvtTruncate; + type i64s: SimdInt, Mask = Self::mask64s, Bytes = ::Bytes>; // + SimdCvtTruncate; + type mask8s: SimdMask, Bytes = ::Bytes> + Select + Select + Select; + type mask16s: SimdMask, Bytes = ::Bytes> + Select + Select + Select; + type mask32s: SimdMask, Bytes = ::Bytes> type mask8s: SimdMask, Bytes = ::Bytes> + Select + Select + Select; type mask16s: SimdMask, Bytes = ::Bytes> + Select + Select + Select; type mask32s: SimdMask, Bytes = ::Bytes> + Select + Select + Select + Select; + type mask64s: SimdMask, Bytes = ::Bytes> + + Select + Select + Select; // + Select fn level(self) -> Level; /// Call function with CPU features enabled. diff --git a/fearless_simd_gen/src/mk_sse4_2.rs b/fearless_simd_gen/src/mk_sse4_2.rs index 00ed79e4..d363dbc1 100644 --- a/fearless_simd_gen/src/mk_sse4_2.rs +++ b/fearless_simd_gen/src/mk_sse4_2.rs @@ -110,9 +110,12 @@ fn mk_simd_impl() -> TokenStream { type i16s = i16x8; type u32s = u32x4; type i32s = i32x4; + type u64s = u64x2; + type i64s = i64x2; type mask8s = mask8x16; type mask16s = mask16x8; type mask32s = mask32x4; + type mask64s = mask64x2; #[inline(always)] fn level(self) -> Level { Level::#level_tok(self) @@ -265,6 +268,7 @@ pub(crate) fn handle_compare( 8 => quote! { 0x80u8 }, 16 => quote! { 0x8000u16 }, 32 => quote! { 0x80000000u32 }, + 64 => quote! { 0x8000000000000000u64 }, _ => unimplemented!(), }; let gt = @@ -280,6 +284,17 @@ pub(crate) fn handle_compare( let a_signed = _mm_xor_si128(a.into(), sign_bit); let b_signed = _mm_xor_si128(b.into(), sign_bit); + #gt(#args) + } + } else if vec_ty.scalar == ScalarType::Int { + let gt = simple_intrinsic("cmpgt", vec_ty.scalar, vec_ty.scalar_bits, ty_bits); + let args = if method == "simd_lt" { + quote! { b.into(), a.into() } + } else { + quote! { a.into(), b.into() } + }; + + quote! { #gt(#args) } } else { diff --git a/fearless_simd_gen/src/mk_wasm.rs b/fearless_simd_gen/src/mk_wasm.rs index 5004ed60..db977ba0 100644 --- a/fearless_simd_gen/src/mk_wasm.rs +++ b/fearless_simd_gen/src/mk_wasm.rs @@ -386,6 +386,13 @@ fn mk_simd_impl(level: Level) -> TokenStream { quote! { 2, 3, 6, 7 }, quote! { u32x4_shuffle }, ), + 64 => ( + quote! { 0, 2 }, + quote! { 1, 3 }, + quote! { 0, 1 }, + quote! { 2, 3 }, + quote! { u64x2_shuffle }, + ), _ => panic!("unsupported scalar_bits"), }; @@ -455,6 +462,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { quote! { 2, 6, 3, 7 }, quote! { u32x4_shuffle }, ), + 64 => (quote! { 0, 2 }, quote! { 1, 3 }, quote! { u64x2_shuffle }), _ => panic!("unsupported scalar_bits"), }; diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index adb43ffa..ba7ec940 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -1352,6 +1352,13 @@ fn select_native_width_vectors(simd: S) { let b_i16 = S::i16s::from_slice(simd, &vec![-50i16; S::i16s::N]); let result_i16 = mask_u16.select(a_i16, b_i16); assert_eq!(result_i16.as_slice(), vec![50i16; S::i16s::N]); + + // Test with native u64 vectors + let a_i16 = S::i64s::from_slice(simd, &vec![50i64; S::i64s::N]); + let b_i16 = S::i64s::from_slice(simd, &vec![-50i64; S::i64s::N]); + let mask_u64 = S::mask64s::from_slice(simd, &vec![-1i64; S::mask64s::N]); + let result_i16 = mask_u64.select(a_i16, b_i16); + assert_eq!(result_i16.as_slice(), vec![50i64; S::i64s::N]); } #[simd_test] From 396e174afec2c0b4634d755d9f951981a3fff872 Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Sun, 21 Sep 2025 17:20:06 +0200 Subject: [PATCH 05/17] Add unzip + tests --- fearless_simd_gen/src/mk_sse4_2.rs | 10 +++++ fearless_simd_tests/tests/harness/mod.rs | 57 ++++++++++++++++++++++-- 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/fearless_simd_gen/src/mk_sse4_2.rs b/fearless_simd_gen/src/mk_sse4_2.rs index d363dbc1..0e859b85 100644 --- a/fearless_simd_gen/src/mk_sse4_2.rs +++ b/fearless_simd_gen/src/mk_sse4_2.rs @@ -611,6 +611,16 @@ pub(crate) fn handle_unzip( quote! { unsafe { #intrinsic::<#mask>(a.into(), b.into()).simd_into(self) } } } else { match vec_ty.scalar_bits { + 64 => { + let op = if select_even { "lo" } else { "hi" }; + let intrinsic = format_ident!("_mm_unpack{op}_epi64"); + + quote! { + unsafe { + #intrinsic(a.into(), b.into()).simd_into(self) + } + } + } 32 => { let op = if select_even { "lo" } else { "hi" }; diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index ba7ec940..21d3ecc6 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -703,6 +703,20 @@ fn zip_high_u32x4(simd: S) { assert_eq!(simd.zip_high_u32x4(a, b).val, [2, 6, 3, 7]); } +#[simd_test] +fn zip_low_u64x2(simd: S) { + let a = u64x2::from_slice(simd, &[0, 1]); + let b = u64x2::from_slice(simd, &[4, 5]); + assert_eq!(simd.zip_low_u64x2(a, b).val, [0, 4]); +} + +#[simd_test] +fn zip_high_u64x2(simd: S) { + let a = u64x2::from_slice(simd, &[0, 1]); + let b = u64x2::from_slice(simd, &[4, 5]); + assert_eq!(simd.zip_high_u64x2(a, b).val, [1, 5]); +} + #[simd_test] fn unzip_low_f32x4(simd: S) { let a = f32x4::from_slice(simd, &[1.0, 2.0, 3.0, 4.0]); @@ -871,6 +885,34 @@ fn unzip_high_u32x4(simd: S) { assert_eq!(simd.unzip_high_u32x4(a, b).val, [2, 4, 6, 8]); } +#[simd_test] +fn unzip_low_i64x2(simd: S) { + let a = i64x2::from_slice(simd, &[1, 2]); + let b = i64x2::from_slice(simd, &[3, 4]); + assert_eq!(simd.unzip_low_i64x2(a, b).val, [1, 3]); +} + +#[simd_test] +fn unzip_high_i64x2(simd: S) { + let a = i64x2::from_slice(simd, &[1, 2]); + let b = i64x2::from_slice(simd, &[3, 4]); + assert_eq!(simd.unzip_high_i64x2(a, b).val, [2, 4]); +} + +#[simd_test] +fn unzip_low_u64x2(simd: S) { + let a = u64x2::from_slice(simd, &[1, 2]); + let b = u64x2::from_slice(simd, &[3, 4]); + assert_eq!(simd.unzip_low_u64x2(a, b).val, [1, 3]); +} + +#[simd_test] +fn unzip_high_u64x2(simd: S) { + let a = u64x2::from_slice(simd, &[1, 2]); + let b = u64x2::from_slice(simd, &[3, 4]); + assert_eq!(simd.unzip_high_u64x2(a, b).val, [2, 4]); +} + #[simd_test] fn unzip_low_f64x2(simd: S) { let a = f64x2::from_slice(simd, &[1.0, 2.0]); @@ -1354,11 +1396,18 @@ fn select_native_width_vectors(simd: S) { assert_eq!(result_i16.as_slice(), vec![50i16; S::i16s::N]); // Test with native u64 vectors - let a_i16 = S::i64s::from_slice(simd, &vec![50i64; S::i64s::N]); - let b_i16 = S::i64s::from_slice(simd, &vec![-50i64; S::i64s::N]); + let a_i64 = S::i64s::from_slice(simd, &vec![50i64; S::i64s::N]); + let b_i64 = S::i64s::from_slice(simd, &vec![-50i64; S::i64s::N]); + let mask_i64 = S::mask64s::from_slice(simd, &vec![-1i64; S::mask64s::N]); + let result_i64 = mask_i64.select(a_i64, b_i64); + assert_eq!(result_i64.as_slice(), vec![50i64; S::i64s::N]); + + // Test with native u64 vectors + let a_u64 = S::u64s::from_slice(simd, &vec![100u64; S::u64s::N]); + let b_u64 = S::u64s::from_slice(simd, &vec![200u64; S::u64s::N]); let mask_u64 = S::mask64s::from_slice(simd, &vec![-1i64; S::mask64s::N]); - let result_i16 = mask_u64.select(a_i16, b_i16); - assert_eq!(result_i16.as_slice(), vec![50i64; S::i64s::N]); + let result_u64 = mask_u64.select(a_u64, b_u64); + assert_eq!(result_u64.as_slice(), vec![100u64; S::u64s::N]); } #[simd_test] From eae5a4b5e783b819fc7af45ddd14791360ef0d90 Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Sun, 21 Sep 2025 20:00:50 +0200 Subject: [PATCH 06/17] 128bit types for neon wasm and avx2 --- fearless_simd_gen/src/mk_avx2.rs | 6 +++--- fearless_simd_gen/src/mk_neon.rs | 3 +++ fearless_simd_gen/src/mk_wasm.rs | 3 +++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fearless_simd_gen/src/mk_avx2.rs b/fearless_simd_gen/src/mk_avx2.rs index bdf81369..470eb009 100644 --- a/fearless_simd_gen/src/mk_avx2.rs +++ b/fearless_simd_gen/src/mk_avx2.rs @@ -112,12 +112,12 @@ fn mk_simd_impl() -> TokenStream { type i16s = i16x8; type u32s = u32x4; type i32s = i32x4; - type u64s = u64x4; - type i64s = i64x4; + type u64s = u64x2; + type i64s = i64x2; type mask8s = mask8x16; type mask16s = mask16x8; type mask32s = mask32x4; - type mask64s = mask64x4; + type mask64s = mask64x2; #[inline(always)] fn level(self) -> Level { Level::#level_tok(self) diff --git a/fearless_simd_gen/src/mk_neon.rs b/fearless_simd_gen/src/mk_neon.rs index 48ca55e6..d6a415da 100644 --- a/fearless_simd_gen/src/mk_neon.rs +++ b/fearless_simd_gen/src/mk_neon.rs @@ -409,9 +409,12 @@ fn mk_simd_impl(level: Level) -> TokenStream { type i16s = i16x8; type u32s = u32x4; type i32s = i32x4; + type u64s = u64x2; + type i64s = i64x2; type mask8s = mask8x16; type mask16s = mask16x8; type mask32s = mask32x4; + type mask64s = mask64x2; #[inline(always)] fn level(self) -> Level { Level::#level_tok(self) diff --git a/fearless_simd_gen/src/mk_wasm.rs b/fearless_simd_gen/src/mk_wasm.rs index db977ba0..454a1ebc 100644 --- a/fearless_simd_gen/src/mk_wasm.rs +++ b/fearless_simd_gen/src/mk_wasm.rs @@ -534,9 +534,12 @@ fn mk_simd_impl(level: Level) -> TokenStream { type i16s = i16x8; type u32s = u32x4; type i32s = i32x4; + type u64s = u64x2; + type i64s = i64x2; type mask8s = mask8x16; type mask16s = mask16x8; type mask32s = mask32x4; + type mask64s = mask64x2; #[inline(always)] fn level(self) -> Level { From fb57d66c5c627a9c0264a3ee5d0e750d35bb2448 Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Tue, 23 Sep 2025 20:49:09 +0200 Subject: [PATCH 07/17] wasm: wip --- fearless_simd_gen/src/mk_wasm.rs | 69 ++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/fearless_simd_gen/src/mk_wasm.rs b/fearless_simd_gen/src/mk_wasm.rs index 454a1ebc..0de184f6 100644 --- a/fearless_simd_gen/src/mk_wasm.rs +++ b/fearless_simd_gen/src/mk_wasm.rs @@ -12,6 +12,7 @@ use quote::{format_ident, quote}; use crate::generic::scalar_binary; use crate::ops::valid_reinterpret; +use crate::types::VecType; use crate::{ arch::{Arch, wasm::Wasm}, generic::{generic_combine, generic_op, generic_split}, @@ -64,6 +65,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { #[inline(always)] fn #method_ident(#args) -> #ret_ty }; + let m = match sig { OpSig::Splat => { let expr = Wasm.expr(method, vec_ty, &[quote! { val }]); @@ -118,6 +120,45 @@ fn mk_simd_impl(level: Level) -> TokenStream { OpSig::Binary => { let args = [quote! { a.into() }, quote! { b.into() }]; match method { + "max" | "min" if vec_ty.scalar_bits == 64 && vec_ty.len == 2 => { + let is_max = method == "max"; + + let xor_for_unsigned = if vec_ty.scalar == ScalarType::Unsigned { + quote! { + let sign_bit = i64x2_splat(0x8000_0000_0000_0000u64 as i64); + let a_signed = v128_xor(a.into(), sign_bit); + let b_signed = v128_xor(b.into(), sign_bit); + } + } else { + quote! { + let a_signed = a.into(); + let b_signed = b.into(); + } + }; + + let body = if is_max { + quote! { + let mask = i64x2_gt(a_signed, b_signed); + let a_masked = v128_and(mask, a.into()); + let b_masked = v128_andnot(mask, b.into()); + v128_or(a_masked, b_masked) + } + } else { + quote! { + let mask = i64x2_gt(a_signed, b_signed); + let a_masked = v128_andnot(mask, a.into()); + let b_masked = v128_and(mask, b.into()); + v128_or(a_masked, b_masked) + } + }; + + quote! { + #method_sig { + #xor_for_unsigned + #body.simd_into(self) + } + } + } "mul" if vec_ty.scalar_bits == 8 && vec_ty.len == 16 => { let (extmul_low, extmul_high) = match vec_ty.scalar { ScalarType::Unsigned => ( @@ -183,9 +224,31 @@ fn mk_simd_impl(level: Level) -> TokenStream { OpSig::Compare => { let args = [quote! { a.into() }, quote! { b.into() }]; let expr = Wasm.expr(method, vec_ty, &args); - quote! { - #method_sig { - #expr.simd_into(self) + + let missing_op = ["lt", "gt", "le", "ge"] + .iter() + .find(|&op| method.ends_with(op)); + + if vec_ty.scalar_bits == 64 + && vec_ty.scalar == ScalarType::Unsigned + && missing_op.is_some() + { + let op = missing_op.unwrap(); + let wasm_ident = format_ident!("i64x2_{}", op); + quote! { + #method_sig { + let sign_bit = i64x2_splat(0x8000_0000_0000_0000u64 as i64); + let a_signed = v128_xor(a.into(), sign_bit); + let b_signed = v128_xor(b.into(), sign_bit); + + #wasm_ident(a_signed, b_signed).simd_into(self) + } + } + } else { + quote! { + #method_sig { + #expr.simd_into(self) + } } } } From 72271b1a08d03054fce4b563b294336595b5f6c1 Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Tue, 23 Sep 2025 21:26:55 +0200 Subject: [PATCH 08/17] fixup rebase mistake --- fearless_simd_gen/src/mk_fallback.rs | 2 +- fearless_simd_gen/src/mk_simd_trait.rs | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/fearless_simd_gen/src/mk_fallback.rs b/fearless_simd_gen/src/mk_fallback.rs index 1f243550..d5ba2ccd 100644 --- a/fearless_simd_gen/src/mk_fallback.rs +++ b/fearless_simd_gen/src/mk_fallback.rs @@ -407,7 +407,7 @@ fn mk_simd_impl() -> TokenStream { type mask8s = mask8x16; type mask16s = mask16x8; type mask32s = mask32x4; - type mask64s = mask64x4; + type mask64s = mask64x2; #[inline(always)] fn level(self) -> Level { Level::#level_tok(self) diff --git a/fearless_simd_gen/src/mk_simd_trait.rs b/fearless_simd_gen/src/mk_simd_trait.rs index 6071bcf7..d0a40dd4 100644 --- a/fearless_simd_gen/src/mk_simd_trait.rs +++ b/fearless_simd_gen/src/mk_simd_trait.rs @@ -45,16 +45,11 @@ pub fn mk_simd_trait() -> TokenStream { type i32s: SimdInt, Mask = Self::mask32s, Bytes = ::Bytes> + SimdCvtTruncate + core::ops::Neg; type u64s: SimdInt, Mask = Self::mask64s>; // + SimdCvtTruncate; - type i64s: SimdInt, Mask = Self::mask64s, Bytes = ::Bytes>; // + SimdCvtTruncate; + type i64s: SimdInt, Mask = Self::mask64s, Bytes = ::Bytes>; // + SimdCvtTruncate; type mask8s: SimdMask, Bytes = ::Bytes> + Select + Select + Select; type mask16s: SimdMask, Bytes = ::Bytes> + Select + Select + Select; - type mask32s: SimdMask, Bytes = ::Bytes> - type mask8s: SimdMask, Bytes = ::Bytes> + Select + Select + Select; - type mask16s: SimdMask, Bytes = ::Bytes> + Select + Select + Select; - type mask32s: SimdMask, Bytes = ::Bytes> - + Select + Select + Select + Select; - type mask64s: SimdMask, Bytes = ::Bytes> - + Select + Select + Select; // + Select + type mask32s: SimdMask, Bytes = ::Bytes> + Select + Select + Select + Select; + type mask64s: SimdMask, Bytes = ::Bytes> + Select + Select + Select; // + Select fn level(self) -> Level; /// Call function with CPU features enabled. From ca0a310b8101db94239cbafad576c82dae2481e5 Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Tue, 23 Sep 2025 21:46:53 +0200 Subject: [PATCH 09/17] more rebase fixups --- fearless_simd_gen/src/mk_fallback.rs | 4 ++-- fearless_simd_gen/src/mk_simd_trait.rs | 2 +- fearless_simd_gen/src/mk_wasm.rs | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fearless_simd_gen/src/mk_fallback.rs b/fearless_simd_gen/src/mk_fallback.rs index d5ba2ccd..40d796f6 100644 --- a/fearless_simd_gen/src/mk_fallback.rs +++ b/fearless_simd_gen/src/mk_fallback.rs @@ -402,8 +402,8 @@ fn mk_simd_impl() -> TokenStream { type i16s = i16x8; type u32s = u32x4; type i32s = i32x4; - type u64s = u64x4; - type i64s = i64x4; + type u64s = u64x2; + type i64s = i64x2; type mask8s = mask8x16; type mask16s = mask16x8; type mask32s = mask32x4; diff --git a/fearless_simd_gen/src/mk_simd_trait.rs b/fearless_simd_gen/src/mk_simd_trait.rs index d0a40dd4..1bcc78ba 100644 --- a/fearless_simd_gen/src/mk_simd_trait.rs +++ b/fearless_simd_gen/src/mk_simd_trait.rs @@ -45,7 +45,7 @@ pub fn mk_simd_trait() -> TokenStream { type i32s: SimdInt, Mask = Self::mask32s, Bytes = ::Bytes> + SimdCvtTruncate + core::ops::Neg; type u64s: SimdInt, Mask = Self::mask64s>; // + SimdCvtTruncate; - type i64s: SimdInt, Mask = Self::mask64s, Bytes = ::Bytes>; // + SimdCvtTruncate; + type i64s: SimdInt, Mask = Self::mask64s, Bytes = ::Bytes> + core::ops::Neg; // + SimdCvtTruncate; type mask8s: SimdMask, Bytes = ::Bytes> + Select + Select + Select; type mask16s: SimdMask, Bytes = ::Bytes> + Select + Select + Select; type mask32s: SimdMask, Bytes = ::Bytes> + Select + Select + Select + Select; diff --git a/fearless_simd_gen/src/mk_wasm.rs b/fearless_simd_gen/src/mk_wasm.rs index 0de184f6..9c1b560c 100644 --- a/fearless_simd_gen/src/mk_wasm.rs +++ b/fearless_simd_gen/src/mk_wasm.rs @@ -12,7 +12,6 @@ use quote::{format_ident, quote}; use crate::generic::scalar_binary; use crate::ops::valid_reinterpret; -use crate::types::VecType; use crate::{ arch::{Arch, wasm::Wasm}, generic::{generic_combine, generic_op, generic_split}, From 42396b0739c9a846f647c4afc61f23a241febb7f Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Thu, 25 Sep 2025 20:29:18 +0200 Subject: [PATCH 10/17] u64 compare tests --- fearless_simd_tests/tests/harness/mod.rs | 48 ++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index 21d3ecc6..a6adc814 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -1348,6 +1348,54 @@ fn simd_ge_i8x16(simd: S) { ); } +#[simd_test] +fn simd_ge_i64x2(simd: S) { + let vals = i64x2::from_slice(simd, &[0, -45]); + let mask = vals.simd_ge(i64x2::splat(simd, -1)); + + assert_eq!(mask.val, [-1, 0]); +} + +#[simd_test] +fn simd_le_i64x2(simd: S) { + let vals = i64x2::from_slice(simd, &[0, -45]); + let mask = vals.simd_le(i64x2::splat(simd, -1)); + + assert_eq!(mask.val, [0, -1]); +} + +#[simd_test] +fn simd_ge_u64x2(simd: S) { + let vals = u64x2::from_slice(simd, &[0, 45]); + let mask = vals.simd_ge(u64x2::splat(simd, 45)); + + assert_eq!(mask.val, [0, -1]); +} + +#[simd_test] +fn simd_le_u64x2(simd: S) { + let vals = u64x2::from_slice(simd, &[0, 45]); + let mask = vals.simd_le(u64x2::splat(simd, 45)); + + assert_eq!(mask.val, [-1, -1]); +} + +#[simd_test] +fn simd_lt_u64x2(simd: S) { + let vals = u64x2::from_slice(simd, &[0, 45]); + let mask = vals.simd_lt(u64x2::splat(simd, 45)); + + assert_eq!(mask.val, [-1, 0]); +} + +#[simd_test] +fn simd_gt_u64x2(simd: S) { + let vals = u64x2::from_slice(simd, &[0, 45]); + let mask = vals.simd_gt(u64x2::splat(simd, 45)); + + assert_eq!(mask.val, [0, 0]); +} + #[simd_test] fn select_native_width_vectors(simd: S) { // Test with native f32 vectors From 40bbad7fec69e49ec31bb3428e1f90eee1617510 Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Thu, 25 Sep 2025 20:42:49 +0200 Subject: [PATCH 11/17] SSE4.2 wierdness --- fearless_simd_gen/src/mk_sse4_2.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fearless_simd_gen/src/mk_sse4_2.rs b/fearless_simd_gen/src/mk_sse4_2.rs index 0e859b85..05caf4b6 100644 --- a/fearless_simd_gen/src/mk_sse4_2.rs +++ b/fearless_simd_gen/src/mk_sse4_2.rs @@ -286,8 +286,9 @@ pub(crate) fn handle_compare( #gt(#args) } - } else if vec_ty.scalar == ScalarType::Int { + } else if vec_ty.scalar == ScalarType::Int && vec_ty.scalar_bits == 64 && vec_ty.len == 2 { let gt = simple_intrinsic("cmpgt", vec_ty.scalar, vec_ty.scalar_bits, ty_bits); + // SSE4.2 only has signed GT for i64 let args = if method == "simd_lt" { quote! { b.into(), a.into() } } else { From 60b52c1348ab70233faacf69947d5b328dc07083 Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Thu, 25 Sep 2025 21:23:48 +0200 Subject: [PATCH 12/17] WIP: Is eq bogus or is my test wrong? --- fearless_simd_tests/tests/harness/mod.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index a6adc814..07a7cc61 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -1396,6 +1396,22 @@ fn simd_gt_u64x2(simd: S) { assert_eq!(mask.val, [0, 0]); } +#[simd_test] +fn simd_eq_u64x2(simd: S) { + let vals = u64x2::from_slice(simd, &[45, 45]); + let mask = vals.simd_eq(u64x2::splat(simd, 45)); + + assert_eq!(mask.val, [-1, -1]); +} + +#[simd_test] +fn simd_eq_i64x2(simd: S) { + let vals = i64x2::from_slice(simd, &[-3, -3]); + let mask = vals.simd_eq(i64x2::splat(simd, -3)); + + assert_eq!(mask.val, [-1, -1]); +} + #[simd_test] fn select_native_width_vectors(simd: S) { // Test with native f32 vectors From 18ae90ac9bf47bd6e4113c790e5b4bb4d32e54b4 Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Sat, 27 Sep 2025 14:45:06 +0200 Subject: [PATCH 13/17] More tests and fixes --- fearless_simd_gen/src/mk_sse4_2.rs | 24 +++++++++++++++----- fearless_simd_tests/tests/harness/mod.rs | 28 +++++++++++++++++++----- 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/fearless_simd_gen/src/mk_sse4_2.rs b/fearless_simd_gen/src/mk_sse4_2.rs index 05caf4b6..3b36c4ac 100644 --- a/fearless_simd_gen/src/mk_sse4_2.rs +++ b/fearless_simd_gen/src/mk_sse4_2.rs @@ -261,6 +261,10 @@ pub(crate) fn handle_compare( let max_min_expr = arch.expr(max_min, vec_ty, &args); quote! { #eq_intrinsic(#max_min_expr, a.into()) } + } else if matches!(method, "simd_eq") && vec_ty.scalar_bits == 64 { + let eq = + simple_sign_unaware_intrinsic("cmpeq", vec_ty.scalar, vec_ty.scalar_bits, ty_bits); + quote! { #eq(a.into(), b.into()) } } else if vec_ty.scalar == ScalarType::Unsigned { // SSE4.2 only has signed GT/LT, but not unsigned. let set = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, ty_bits); @@ -286,8 +290,14 @@ pub(crate) fn handle_compare( #gt(#args) } - } else if vec_ty.scalar == ScalarType::Int && vec_ty.scalar_bits == 64 && vec_ty.len == 2 { - let gt = simple_intrinsic("cmpgt", vec_ty.scalar, vec_ty.scalar_bits, ty_bits); + } else if vec_ty.scalar_bits == 64 { + let intrinsic_name = if matches!(method, "simd_eq") { + "cmpeq" + } else { + "cmpgt" + }; + + let cmp = simple_intrinsic(intrinsic_name, vec_ty.scalar, vec_ty.scalar_bits, ty_bits); // SSE4.2 only has signed GT for i64 let args = if method == "simd_lt" { quote! { b.into(), a.into() } @@ -295,13 +305,17 @@ pub(crate) fn handle_compare( quote! { a.into(), b.into() } }; - quote! { - #gt(#args) - } + let res = quote! { + #cmp(#args) + }; + + let str = res.to_string(); + res } else { arch.expr(method, vec_ty, &args) } } else { + // Floating point comparison arch.expr(method, vec_ty, &args) }; diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index 07a7cc61..bb881893 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -1357,17 +1357,17 @@ fn simd_ge_i64x2(simd: S) { } #[simd_test] -fn simd_le_i64x2(simd: S) { - let vals = i64x2::from_slice(simd, &[0, -45]); - let mask = vals.simd_le(i64x2::splat(simd, -1)); +fn simd_ge_u64x2(simd: S) { + let vals = u64x2::from_slice(simd, &[0, 45]); + let mask = vals.simd_ge(u64x2::splat(simd, 45)); assert_eq!(mask.val, [0, -1]); } #[simd_test] -fn simd_ge_u64x2(simd: S) { - let vals = u64x2::from_slice(simd, &[0, 45]); - let mask = vals.simd_ge(u64x2::splat(simd, 45)); +fn simd_le_i64x2(simd: S) { + let vals = i64x2::from_slice(simd, &[0, -45]); + let mask = vals.simd_le(i64x2::splat(simd, -1)); assert_eq!(mask.val, [0, -1]); } @@ -1388,6 +1388,14 @@ fn simd_lt_u64x2(simd: S) { assert_eq!(mask.val, [-1, 0]); } +#[simd_test] +fn simd_lt_i64x2(simd: S) { + let vals = i64x2::from_slice(simd, &[0, -45]); + let mask = vals.simd_lt(i64x2::splat(simd, 0)); + + assert_eq!(mask.val, [0, -1]); +} + #[simd_test] fn simd_gt_u64x2(simd: S) { let vals = u64x2::from_slice(simd, &[0, 45]); @@ -1396,6 +1404,14 @@ fn simd_gt_u64x2(simd: S) { assert_eq!(mask.val, [0, 0]); } +#[simd_test] +fn simd_gt_i64x2(simd: S) { + let vals = i64x2::from_slice(simd, &[0, 45]); + let mask = vals.simd_gt(i64x2::splat(simd, 44)); + + assert_eq!(mask.val, [0, -1]); +} + #[simd_test] fn simd_eq_u64x2(simd: S) { let vals = u64x2::from_slice(simd, &[45, 45]); From db74f859b70b35044f36f7eb34e31e24de4565c9 Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Sun, 28 Sep 2025 11:25:37 +0200 Subject: [PATCH 14/17] more tests --- fearless_simd_tests/tests/harness/mod.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index bb881893..3f3c5150 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -986,6 +986,12 @@ fn shr_u32x4(simd: S) { assert_eq!((a >> 8).val, [16777215, 8388608, 256, 1]); } +#[simd_test] +fn shr_u64x2(simd: S) { + let a = u64x2::from_slice(simd, &[u64::MAX, 2147483648u64]); + assert_eq!((a >> 8).val, [u64::MAX >> 8, 8388608]); +} + #[simd_test] fn shrv_u32x4(simd: S) { let a = u32x4::from_slice(simd, &[u32::MAX, 2147483648, 65536, 256]); @@ -1011,6 +1017,12 @@ fn shl_u32x4(simd: S) { assert_eq!((a << 4).val, [0xFFFFFFF0, 0xFFFF0, 0xFF0, 0]); } +#[simd_test] +fn shl_u64x2(simd: S) { + let a = u64x2::from_slice(simd, &[0xFFFFFFFFFFFFFFFu64, 0]); + assert_eq!((a << 4).val, [0xFFFFFFFFFFFFFFF0u64, 0]); +} + #[simd_test] fn select_f32x4(simd: S) { let mask = mask32x4::from_slice(simd, &[-1, 0, -1, 0]); From 744b04cea9249b452caae7cb28630f6bc0f6425e Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Sun, 28 Sep 2025 11:33:29 +0200 Subject: [PATCH 15/17] more tests --- fearless_simd_tests/tests/harness/mod.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index 3f3c5150..77ee6309 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -1156,6 +1156,23 @@ fn select_mask32x4(simd: S) { assert_eq!(result.val, [-1, 0, 0, -1]); } +#[simd_test] +fn select_u64x2(simd: S) { + let mask = mask64x2::from_slice(simd, &[0, -1]); + let b = u64x2::from_slice(simd, &[100000, 200000]); + let c = u64x2::from_slice(simd, &[1000, 2000]); + assert_eq!(mask.select(b, c).val, [1000, 200000]); +} + +#[simd_test] +fn select_mask64x2(simd: S) { + let mask = mask64x2::from_slice(simd, &[-1, 0]); + let b = mask64x2::from_slice(simd, &[-1, 42]); + let c = mask64x2::from_slice(simd, &[100, -1]); + let result: mask64x2<_> = mask.select(b, c); + assert_eq!(result.val, [-1, -1]); +} + #[simd_test] fn widen_u8x16(simd: S) { let a = u8x16::from_slice( From a65e49f8c1ca96be5f16ef62222702f9301e1836 Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Sun, 28 Sep 2025 14:10:50 +0200 Subject: [PATCH 16/17] remove debug code --- fearless_simd_gen/src/mk_sse4_2.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fearless_simd_gen/src/mk_sse4_2.rs b/fearless_simd_gen/src/mk_sse4_2.rs index 3b36c4ac..26eadf46 100644 --- a/fearless_simd_gen/src/mk_sse4_2.rs +++ b/fearless_simd_gen/src/mk_sse4_2.rs @@ -305,12 +305,9 @@ pub(crate) fn handle_compare( quote! { a.into(), b.into() } }; - let res = quote! { + quote! { #cmp(#args) - }; - - let str = res.to_string(); - res + } } else { arch.expr(method, vec_ty, &args) } From e9dbc7245749d5c793d8800a19b2a3772ec6e0e7 Mon Sep 17 00:00:00 2001 From: Daniel Buch Hansen Date: Sun, 28 Sep 2025 14:15:10 +0200 Subject: [PATCH 17/17] fixup c/p comment error --- fearless_simd_tests/tests/harness/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index 77ee6309..6fd59f3b 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -1504,7 +1504,7 @@ fn select_native_width_vectors(simd: S) { let result_i16 = mask_u16.select(a_i16, b_i16); assert_eq!(result_i16.as_slice(), vec![50i16; S::i16s::N]); - // Test with native u64 vectors + // Test with native i64 vectors let a_i64 = S::i64s::from_slice(simd, &vec![50i64; S::i64s::N]); let b_i64 = S::i64s::from_slice(simd, &vec![-50i64; S::i64s::N]); let mask_i64 = S::mask64s::from_slice(simd, &vec![-1i64; S::mask64s::N]);