diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index 7831ea7435..bc91339138 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -3139,6 +3139,32 @@ mod tests { let r = _mm_madd_epi16(a, b); let e = _mm_setr_epi32(29, 81, 149, 233); assert_eq_m128i(r, e); + + // Test large values. + // MIN*MIN+MIN*MIN will overflow into i32::MIN. + let a = _mm_setr_epi16( + i16::MAX, + i16::MAX, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MAX, + 0, + 0, + ); + let b = _mm_setr_epi16( + i16::MAX, + i16::MAX, + i16::MIN, + i16::MIN, + i16::MAX, + i16::MIN, + 0, + 0, + ); + let r = _mm_madd_epi16(a, b); + let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0); + assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] diff --git a/crates/core_arch/src/x86/sse41.rs b/crates/core_arch/src/x86/sse41.rs index 6d33238b08..af51a53feb 100644 --- a/crates/core_arch/src/x86/sse41.rs +++ b/crates/core_arch/src/x86/sse41.rs @@ -1294,6 +1294,13 @@ mod tests { let r = _mm_insert_ps::<0b11_00_1100>(a, b); let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0); assert_eq_m128(r, e); + + // Zeroing takes precedence over copied value + let a = _mm_set1_ps(1.0); + let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let r = _mm_insert_ps::<0b11_00_0001>(a, b); + let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0); + assert_eq_m128(r, e); } #[simd_test(enable = "sse4.1")] @@ -1682,30 +1689,58 @@ mod tests { assert_eq_m128(r, e); } - #[allow(deprecated)] // FIXME: This test uses deprecated CSR access functions #[simd_test(enable = "sse4.1")] unsafe fn test_mm_round_sd() { let a = _mm_setr_pd(1.5, 3.5); let b = _mm_setr_pd(-2.5, -4.5); - let old_mode = _MM_GET_ROUNDING_MODE(); - _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO); - let r = _mm_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); - _MM_SET_ROUNDING_MODE(old_mode); + let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b); + let e = _mm_setr_pd(-2.0, 3.5); + assert_eq_m128d(r, e); + + let a = _mm_setr_pd(1.5, 3.5); + let b = _mm_setr_pd(-2.5, -4.5); + let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b); + let e = _mm_setr_pd(-3.0, 3.5); + assert_eq_m128d(r, e); + + let a = _mm_setr_pd(1.5, 3.5); + let b = _mm_setr_pd(-2.5, -4.5); + let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b); + let e = _mm_setr_pd(-2.0, 3.5); + assert_eq_m128d(r, e); + + let a = _mm_setr_pd(1.5, 3.5); + let b = _mm_setr_pd(-2.5, -4.5); + let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b); let e = _mm_setr_pd(-2.0, 3.5); assert_eq_m128d(r, e); } - #[allow(deprecated)] // FIXME: This test uses deprecated CSR access functions #[simd_test(enable = "sse4.1")] unsafe fn test_mm_round_ss() { let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); - let old_mode = _MM_GET_ROUNDING_MODE(); - _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); - let r = _mm_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b); - _MM_SET_ROUNDING_MODE(old_mode); + let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b); let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); assert_eq_m128(r, e); + + let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); + let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); + let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b); + let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); + assert_eq_m128(r, e); + + let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); + let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); + let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b); + let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5); + assert_eq_m128(r, e); + + let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); + let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); + let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b); + let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5); + assert_eq_m128(r, e); } #[simd_test(enable = "sse4.1")] @@ -1724,6 +1759,15 @@ mod tests { assert_eq_m128i(r, e); } + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_minpos_epu16_3() { + // Case where the minimum value is repeated + let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13); + let r = _mm_minpos_epu16(a); + let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "sse4.1")] unsafe fn test_mm_mul_epi32() { { diff --git a/crates/core_arch/src/x86/ssse3.rs b/crates/core_arch/src/x86/ssse3.rs index bdc6836ac8..4957c2b1ea 100644 --- a/crates/core_arch/src/x86/ssse3.rs +++ b/crates/core_arch/src/x86/ssse3.rs @@ -372,6 +372,11 @@ mod tests { let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1); let r = _mm_shuffle_epi8(a, b); assert_eq_m128i(r, expected); + + // Test indices greater than 15 wrapping around + let b = _mm_add_epi8(b, _mm_set1_epi8(32)); + let r = _mm_shuffle_epi8(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] @@ -421,6 +426,22 @@ mod tests { let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25); let r = _mm_hadd_epi16(a, b); assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4); + let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4); + let expected = _mm_setr_epi16( + i16::MIN, + i16::MIN + 1, + i16::MIN + 2, + i16::MIN + 3, + i16::MAX, + i16::MAX - 1, + i16::MAX - 2, + i16::MAX - 3, + ); + let r = _mm_hadd_epi16(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] @@ -430,6 +451,22 @@ mod tests { let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768); let r = _mm_hadds_epi16(a, b); assert_eq_m128i(r, expected); + + // Test saturating on overflow + let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4); + let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4); + let expected = _mm_setr_epi16( + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + ); + let r = _mm_hadds_epi16(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] @@ -439,6 +476,13 @@ mod tests { let expected = _mm_setr_epi32(3, 7, 132, 7); let r = _mm_hadd_epi32(a, b); assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2); + let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2); + let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1); + let r = _mm_hadd_epi32(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] @@ -448,6 +492,22 @@ mod tests { let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13); let r = _mm_hsub_epi16(a, b); assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4); + let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4); + let expected = _mm_setr_epi16( + i16::MIN, + i16::MIN + 1, + i16::MIN + 2, + i16::MIN + 3, + i16::MAX, + i16::MAX - 1, + i16::MAX - 2, + i16::MAX - 3, + ); + let r = _mm_hsub_epi16(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] @@ -457,6 +517,22 @@ mod tests { let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768); let r = _mm_hsubs_epi16(a, b); assert_eq_m128i(r, expected); + + // Test saturating on overflow + let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4); + let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4); + let expected = _mm_setr_epi16( + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + ); + let r = _mm_hsubs_epi16(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] @@ -466,6 +542,13 @@ mod tests { let expected = _mm_setr_epi32(-1, -1, -124, 1); let r = _mm_hsub_epi32(a, b); assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2); + let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2); + let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1); + let r = _mm_hsub_epi32(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] @@ -485,6 +568,27 @@ mod tests { let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120); let r = _mm_maddubs_epi16(a, b); assert_eq_m128i(r, expected); + + // Test widening and saturation + #[rustfmt::skip] + let a = _mm_setr_epi8( + u8::MAX as i8, u8::MAX as i8, + u8::MAX as i8, u8::MAX as i8, + u8::MAX as i8, u8::MAX as i8, + 100, 100, 0, 0, + 0, 0, 0, 0, 0, 0, + ); + #[rustfmt::skip] + let b = _mm_setr_epi8( + i8::MAX, i8::MAX, + i8::MAX, i8::MIN, + i8::MIN, i8::MIN, + 50, 15, 0, 0, 0, + 0, 0, 0, 0, 0, + ); + let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0); + let r = _mm_maddubs_epi16(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] @@ -494,6 +598,13 @@ mod tests { let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0); let r = _mm_mulhrs_epi16(a, b); assert_eq_m128i(r, expected); + + // Test extreme values + let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0); + let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0); + let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0); + let r = _mm_mulhrs_epi16(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")]