Skip to content

Commit

Permalink
Fix UB in _mm_movemask_ps, _mm_movemask_pd, _mm256_movemask_ps and _m…
Browse files Browse the repository at this point in the history
…m256_movemask_pd

The `simd_bitmask` requires each element to be all-1 or all-0, while the x86 functions only check for the highest bit.
  • Loading branch information
eduardosm committed Oct 11, 2023
1 parent 79a4109 commit 590618d
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 4 deletions.
10 changes: 8 additions & 2 deletions crates/core_arch/src/x86/avx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2066,7 +2066,10 @@ pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
#[cfg_attr(test, assert_instr(vmovmskpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
simd_bitmask::<u64x4, u8>(transmute(a)).into()
// Propagate the highest bit to the rest, because simd_bitmask
// requires all-1 or all-0.
let mask: i64x4 = simd_lt(transmute(a), i64x4::splat(0));
simd_bitmask::<i64x4, u8>(mask).into()
}

/// Sets each bit of the returned mask based on the most significant bit of the
Expand All @@ -2079,7 +2082,10 @@ pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
#[cfg_attr(test, assert_instr(vmovmskps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
simd_bitmask::<u32x8, u8>(transmute(a)).into()
// Propagate the highest bit to the rest, because simd_bitmask
// requires all-1 or all-0.
let mask: i32x8 = simd_lt(transmute(a), i32x8::splat(0));
simd_bitmask::<i32x8, u8>(transmute(mask)).into()
}

/// Returns vector of type __m256d with all elements set to zero.
Expand Down
5 changes: 4 additions & 1 deletion crates/core_arch/src/x86/sse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1081,7 +1081,10 @@ pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
#[cfg_attr(test, assert_instr(movmskps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
simd_bitmask::<u32x4, u8>(transmute(a)).into()
// Propagate the highest bit to the rest, because simd_bitmask
// requires all-1 or all-0.
let mask: i32x4 = simd_lt(transmute(a), i32x4::splat(0));
simd_bitmask::<i32x4, u8>(mask).into()
}

/// Construct a `__m128` with the lowest element read from `p` and the other
Expand Down
5 changes: 4 additions & 1 deletion crates/core_arch/src/x86/sse2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2450,7 +2450,10 @@ pub unsafe fn _mm_setzero_pd() -> __m128d {
#[cfg_attr(test, assert_instr(movmskpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 {
simd_bitmask::<u64x2, u8>(transmute(a)).into()
// Propagate the highest bit to the rest, because simd_bitmask
// requires all-1 or all-0.
let mask: i64x2 = simd_lt(transmute(a), i64x2::splat(0));
simd_bitmask::<i64x2, u8>(mask).into()
}

/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
Expand Down

0 comments on commit 590618d

Please sign in to comment.