Skip to content

Commit c2f21d2

Browse files
thomccAmanieu
authored andcommitted
Avoid using simd_f(min|max) in _mm256_(min|max)_p[sd]
1 parent 6c4f4e1 commit c2f21d2

File tree

1 file changed

+80
-4
lines changed
  • crates/core_arch/src/x86

1 file changed

+80
-4
lines changed

crates/core_arch/src/x86/avx.rs

Lines changed: 80 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 {
196196
#[cfg_attr(test, assert_instr(vmaxpd))]
197197
#[stable(feature = "simd_x86", since = "1.27.0")]
198198
pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d {
199-
simd_fmax(a, b)
199+
vmaxpd(a, b)
200200
}
201201

202202
/// Compares packed single-precision (32-bit) floating-point elements in `a`
@@ -208,7 +208,7 @@ pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d {
208208
#[cfg_attr(test, assert_instr(vmaxps))]
209209
#[stable(feature = "simd_x86", since = "1.27.0")]
210210
pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 {
211-
simd_fmax(a, b)
211+
vmaxps(a, b)
212212
}
213213

214214
/// Compares packed double-precision (64-bit) floating-point elements
@@ -220,7 +220,7 @@ pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 {
220220
#[cfg_attr(test, assert_instr(vminpd))]
221221
#[stable(feature = "simd_x86", since = "1.27.0")]
222222
pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d {
223-
simd_fmin(a, b)
223+
vminpd(a, b)
224224
}
225225

226226
/// Compares packed single-precision (32-bit) floating-point elements in `a`
@@ -232,7 +232,7 @@ pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d {
232232
#[cfg_attr(test, assert_instr(vminps))]
233233
#[stable(feature = "simd_x86", since = "1.27.0")]
234234
pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 {
235-
simd_fmin(a, b)
235+
vminps(a, b)
236236
}
237237

238238
/// Multiplies packed double-precision (64-bit) floating-point elements
@@ -3034,6 +3034,14 @@ extern "C" {
30343034
fn movmskpd256(a: __m256d) -> i32;
30353035
#[link_name = "llvm.x86.avx.movmsk.ps.256"]
30363036
fn movmskps256(a: __m256) -> i32;
3037+
#[link_name = "llvm.x86.avx.min.ps.256"]
3038+
fn vminps(a: __m256, b: __m256) -> __m256;
3039+
#[link_name = "llvm.x86.avx.max.ps.256"]
3040+
fn vmaxps(a: __m256, b: __m256) -> __m256;
3041+
#[link_name = "llvm.x86.avx.min.pd.256"]
3042+
fn vminpd(a: __m256d, b: __m256d) -> __m256d;
3043+
#[link_name = "llvm.x86.avx.max.pd.256"]
3044+
fn vmaxpd(a: __m256d, b: __m256d) -> __m256d;
30373045
}
30383046

30393047
#[cfg(test)]
@@ -3138,6 +3146,23 @@ mod tests {
31383146
let r = _mm256_max_pd(a, b);
31393147
let e = _mm256_setr_pd(2., 4., 6., 8.);
31403148
assert_eq_m256d(r, e);
3149+
// > If the values being compared are both 0.0s (of either sign), the
3150+
// > value in the second operand (source operand) is returned.
3151+
let w = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3152+
let x = _mm256_max_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3153+
let wu: [u64; 4] = transmute(w);
3154+
let xu: [u64; 4] = transmute(x);
3155+
assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3156+
assert_eq!(xu, [0u64; 4]);
3157+
// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3158+
// > second operand (source operand), either a NaN or a valid
3159+
// > floating-point value, is written to the result.
3160+
let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3161+
let z = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3162+
let yf: [f64; 4] = transmute(y);
3163+
let zf: [f64; 4] = transmute(z);
3164+
assert_eq!(yf, [0.0; 4]);
3165+
assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
31413166
}
31423167

31433168
#[simd_test(enable = "avx")]
@@ -3147,6 +3172,23 @@ mod tests {
31473172
let r = _mm256_max_ps(a, b);
31483173
let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
31493174
assert_eq_m256(r, e);
3175+
// > If the values being compared are both 0.0s (of either sign), the
3176+
// > value in the second operand (source operand) is returned.
3177+
let w = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3178+
let x = _mm256_max_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3179+
let wu: [u32; 8] = transmute(w);
3180+
let xu: [u32; 8] = transmute(x);
3181+
assert_eq!(wu, [0x8000_0000u32; 8]);
3182+
assert_eq!(xu, [0u32; 8]);
3183+
// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3184+
// > second operand (source operand), either a NaN or a valid
3185+
// > floating-point value, is written to the result.
3186+
let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3187+
let z = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3188+
let yf: [f32; 8] = transmute(y);
3189+
let zf: [f32; 8] = transmute(z);
3190+
assert_eq!(yf, [0.0; 8]);
3191+
assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
31503192
}
31513193

31523194
#[simd_test(enable = "avx")]
@@ -3156,6 +3198,23 @@ mod tests {
31563198
let r = _mm256_min_pd(a, b);
31573199
let e = _mm256_setr_pd(1., 3., 5., 7.);
31583200
assert_eq_m256d(r, e);
3201+
// > If the values being compared are both 0.0s (of either sign), the
3202+
// > value in the second operand (source operand) is returned.
3203+
let w = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3204+
let x = _mm256_min_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3205+
let wu: [u64; 4] = transmute(w);
3206+
let xu: [u64; 4] = transmute(x);
3207+
assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3208+
assert_eq!(xu, [0u64; 4]);
3209+
// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3210+
// > second operand (source operand), either a NaN or a valid
3211+
// > floating-point value, is written to the result.
3212+
let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3213+
let z = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3214+
let yf: [f64; 4] = transmute(y);
3215+
let zf: [f64; 4] = transmute(z);
3216+
assert_eq!(yf, [0.0; 4]);
3217+
assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
31593218
}
31603219

31613220
#[simd_test(enable = "avx")]
@@ -3165,6 +3224,23 @@ mod tests {
31653224
let r = _mm256_min_ps(a, b);
31663225
let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
31673226
assert_eq_m256(r, e);
3227+
// > If the values being compared are both 0.0s (of either sign), the
3228+
// > value in the second operand (source operand) is returned.
3229+
let w = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3230+
let x = _mm256_min_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3231+
let wu: [u32; 8] = transmute(w);
3232+
let xu: [u32; 8] = transmute(x);
3233+
assert_eq!(wu, [0x8000_0000u32; 8]);
3234+
assert_eq!(xu, [0u32; 8]);
3235+
// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3236+
// > second operand (source operand), either a NaN or a valid
3237+
// > floating-point value, is written to the result.
3238+
let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3239+
let z = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3240+
let yf: [f32; 8] = transmute(y);
3241+
let zf: [f32; 8] = transmute(z);
3242+
assert_eq!(yf, [0.0; 8]);
3243+
assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
31683244
}
31693245

31703246
#[simd_test(enable = "avx")]

0 commit comments

Comments
 (0)