diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 074bf744d5..b7b5a44396 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -2066,7 +2066,10 @@ pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 { #[cfg_attr(test, assert_instr(vmovmskpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 { - simd_bitmask::(transmute(a)).into() + // Propagate the highest bit to the rest, because simd_bitmask + // requires all-1 or all-0. + let mask: i64x4 = simd_lt(transmute(a), i64x4::splat(0)); + simd_bitmask::(mask).into() } /// Sets each bit of the returned mask based on the most significant bit of the @@ -2079,7 +2082,10 @@ pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 { #[cfg_attr(test, assert_instr(vmovmskps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 { - simd_bitmask::(transmute(a)).into() + // Propagate the highest bit to the rest, because simd_bitmask + // requires all-1 or all-0. + let mask: i32x8 = simd_lt(transmute(a), i32x8::splat(0)); + simd_bitmask::(transmute(mask)).into() } /// Returns vector of type __m256d with all elements set to zero. diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 67d20512d7..6a2be09216 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -1081,7 +1081,10 @@ pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(movmskps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 { - simd_bitmask::(transmute(a)).into() + // Propagate the highest bit to the rest, because simd_bitmask + // requires all-1 or all-0. + let mask: i32x4 = simd_lt(transmute(a), i32x4::splat(0)); + simd_bitmask::(mask).into() } /// Construct a `__m128` with the lowest element read from `p` and the other diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index e784c407de..7831ea7435 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -2450,7 +2450,10 @@ pub unsafe fn _mm_setzero_pd() -> __m128d { #[cfg_attr(test, assert_instr(movmskpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 { - simd_bitmask::(transmute(a)).into() + // Propagate the highest bit to the rest, because simd_bitmask + // requires all-1 or all-0. + let mask: i64x2 = simd_lt(transmute(a), i64x2::splat(0)); + simd_bitmask::(mask).into() } /// Loads 128-bits (composed of 2 packed double-precision (64-bit)