Skip to content

Commit 3034207

Browse files
authored
libm: Implement exp and its variants for i586 with inline assembly
Resolve the severe imprecision (~2%) that is due to inconsistent rounding. Closes: #1021
1 parent 8ab6f08 commit 3034207

File tree

9 files changed

+115
-19
lines changed

9 files changed

+115
-19
lines changed

libm-test/src/precision.rs

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,19 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
8383
Bn::Tgamma => 20,
8484
};
8585

86+
// These have a separate implementation on i586
87+
if cfg!(x86_no_sse) {
88+
match ctx.fn_ident {
89+
Id::Exp => ulp = 1,
90+
Id::Exp2 => ulp = 1,
91+
Id::Exp10 => ulp = 1,
92+
Id::Expf => ulp = 0,
93+
Id::Exp2f => ulp = 0,
94+
Id::Exp10f => ulp = 0,
95+
_ => (),
96+
}
97+
}
98+
8699
// There are some cases where musl's approximation is less accurate than ours. For these
87100
// cases, increase the ULP.
88101
if ctx.basis == Musl {
@@ -98,6 +111,8 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
98111
Id::Cbrt => ulp = 2,
99112
// FIXME(#401): musl has an incorrect result here.
100113
Id::Fdim => ulp = 2,
114+
Id::Exp2f => ulp = 1,
115+
Id::Expf => ulp = 1,
101116
Id::Sincosf => ulp = 500,
102117
Id::Tgamma => ulp = 20,
103118
_ => (),
@@ -124,8 +139,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
124139
Id::Asinh => ulp = 3,
125140
Id::Asinhf => ulp = 3,
126141
Id::Cbrt => ulp = 1,
127-
Id::Exp10 | Id::Exp10f => ulp = 1_000_000,
128-
Id::Exp2 | Id::Exp2f => ulp = 10_000_000,
129142
Id::Log1p | Id::Log1pf => ulp = 2,
130143
Id::Tan => ulp = 2,
131144
_ => (),
@@ -218,15 +231,6 @@ impl MaybeOverride<(f32,)> for SpecialCase {
218231
return XFAIL("expm1 representable numbers");
219232
}
220233

221-
if cfg!(x86_no_sse)
222-
&& ctx.base_name == BaseName::Exp2
223-
&& !expected.is_infinite()
224-
&& actual.is_infinite()
225-
{
226-
// We return infinity when there is a representable value. Test input: 127.97238
227-
return XFAIL("586 exp2 representable numbers");
228-
}
229-
230234
if ctx.base_name == BaseName::Sinh && input.0.abs() > 80.0 && actual.is_nan() {
231235
// we return some NaN that should be real values or infinite
232236
if ctx.basis == CheckBasis::Musl {
@@ -278,14 +282,6 @@ impl MaybeOverride<(f64,)> for SpecialCase {
278282
return XFAIL("i586 rint rounding mode");
279283
}
280284

281-
if cfg!(x86_no_sse)
282-
&& (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2)
283-
{
284-
// FIXME: i586 has very imprecise results with ULP > u32::MAX for these
285-
// operations so we can't reasonably provide a limit.
286-
return XFAIL_NOCHECK;
287-
}
288-
289285
if ctx.base_name == BaseName::J0 && input.0 < -1e300 {
290286
// Errors get huge close to -inf
291287
return XFAIL_NOCHECK;

libm/src/math/arch/i586.rs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,62 @@ pub fn floor(mut x: f64) -> f64 {
6060
}
6161
x
6262
}
63+
/// Implements the exponential functions with `x87` assembly.
64+
///
65+
/// This relies on the instruction `f2xm1`, which computes `2^x - 1` (for
66+
/// |x| < 1). This transcendental instruction is documented to produce results
67+
/// with error below 1ulp (in the native double-extended precision format). This
68+
/// translates to correctly rounded results for f32, but results in f64 may have
69+
/// 1ulp error, which may depend on the hardware.
70+
macro_rules! x87exp {
71+
($float_ty:ident, $word_size:literal, $fn_name:ident, $load_op:literal) => {
72+
pub fn $fn_name(mut x: $float_ty) -> $float_ty { unsafe {
73+
core::arch::asm!(
74+
// Prepare the register stack as
75+
// ```
76+
// st(0) = y = x*log2(base)
77+
// st(1) = 1.0
78+
// st(2) = round(y)
79+
// ```
80+
concat!($load_op, " ", $word_size, " ptr [{x}]"),
81+
"fld1",
82+
"fld st(1)",
83+
"frndint",
84+
"fxch st(2)",
85+
86+
// Compare y with round(y) to determine if y is finite and
87+
// not an integer. If so, compute `exp2(y - round(y))` into
88+
// st(1). Otherwise skip ahead with `st(1) = 1.0`
89+
"fucom st(2)",
90+
"fstsw ax",
91+
"test ax, 0x4000",
92+
"jnz 2f",
93+
"fsub st(0), st(2)", // st(0) = y - round(y)
94+
"f2xm1", // st(0) = 2^st(0) - 1.0
95+
"fadd st(1), st(0)", // st(1) = 1 + st(0) = exp2(y - round(y))
96+
"2:",
97+
98+
// Finally, scale by `exp2(round(y))` and clear the stack.
99+
"fstp st(0)",
100+
"fscale",
101+
concat!("fstp ", $word_size, " ptr [{x}]"),
102+
"fstp st(0)",
103+
x = in(reg) &mut x,
104+
out("ax") _,
105+
out("st(0)") _, out("st(1)") _,
106+
out("st(2)") _, out("st(3)") _,
107+
out("st(4)") _, out("st(5)") _,
108+
out("st(6)") _, out("st(7)") _,
109+
options(nostack),
110+
);
111+
x
112+
}}
113+
};
114+
}
115+
116+
x87exp!(f32, "dword", x87_exp2f, "fld");
117+
x87exp!(f64, "qword", x87_exp2, "fld");
118+
x87exp!(f32, "dword", x87_exp10f, "fldl2t\nfmul");
119+
x87exp!(f64, "qword", x87_exp10, "fldl2t\nfmul");
120+
x87exp!(f32, "dword", x87_expf, "fldl2e\nfmul");
121+
x87exp!(f64, "qword", x87_exp, "fldl2e\nfmul");

libm/src/math/arch/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,8 @@ cfg_if! {
4848
pub use i586::{ceil, floor};
4949
}
5050
}
51+
cfg_if! {
52+
if #[cfg(x86_no_sse)] {
53+
pub use i586::{x87_exp10f, x87_exp10, x87_expf, x87_exp, x87_exp2f, x87_exp2};
54+
}
55+
}

libm/src/math/exp.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,12 @@ const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */
8383
/// (where *e* is the base of the natural system of logarithms, approximately 2.71828).
8484
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
8585
pub fn exp(mut x: f64) -> f64 {
86+
select_implementation! {
87+
name: x87_exp,
88+
use_arch_required: x86_no_sse,
89+
args: x,
90+
}
91+
8692
let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023
8793
let x1p_149 = f64::from_bits(0x36a0000000000000); // 0x1p-149 === 2 ^ -149
8894

libm/src/math/exp10.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ const P10: &[f64] = &[
99
/// Calculates 10 raised to the power of `x` (f64).
1010
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
1111
pub fn exp10(x: f64) -> f64 {
12+
select_implementation! {
13+
name: x87_exp10,
14+
use_arch_required: x86_no_sse,
15+
args: x,
16+
}
17+
1218
let (mut y, n) = modf(x);
1319
let u: u64 = n.to_bits();
1420
/* fabs(n) < 16 without raising invalid on nan */

libm/src/math/exp10f.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ const P10: &[f32] = &[
99
/// Calculates 10 raised to the power of `x` (f32).
1010
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
1111
pub fn exp10f(x: f32) -> f32 {
12+
select_implementation! {
13+
name: x87_exp10f,
14+
use_arch_required: x86_no_sse,
15+
args: x,
16+
}
17+
1218
let (mut y, n) = modff(x);
1319
let u = n.to_bits();
1420
/* fabsf(n) < 8 without raising invalid on nan */

libm/src/math/exp2.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,12 @@ static TBL: [u64; TBLSIZE * 2] = [
324324
/// Calculate `2^x`, that is, 2 raised to the power `x`.
325325
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
326326
pub fn exp2(mut x: f64) -> f64 {
327+
select_implementation! {
328+
name: x87_exp2,
329+
use_arch_required: x86_no_sse,
330+
args: x,
331+
}
332+
327333
let redux = f64::from_bits(0x4338000000000000) / TBLSIZE as f64;
328334
let p1 = f64::from_bits(0x3fe62e42fefa39ef);
329335
let p2 = f64::from_bits(0x3fcebfbdff82c575);

libm/src/math/exp2f.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ static EXP2FT: [u64; TBLSIZE] = [
7575
/// Calculate `2^x`, that is, 2 raised to the power `x`.
7676
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
7777
pub fn exp2f(mut x: f32) -> f32 {
78+
select_implementation! {
79+
name: x87_exp2f,
80+
use_arch_required: x86_no_sse,
81+
args: x,
82+
}
83+
7884
let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32;
7985
let p1 = f32::from_bits(0x3f317218);
8086
let p2 = f32::from_bits(0x3e75fdf0);

libm/src/math/expf.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */
3232
/// (where *e* is the base of the natural system of logarithms, approximately 2.71828).
3333
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
3434
pub fn expf(mut x: f32) -> f32 {
35+
select_implementation! {
36+
name: x87_expf,
37+
use_arch_required: x86_no_sse,
38+
args: x,
39+
}
40+
3541
let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127
3642
let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 /*original 0x1p-149f ??????????? */
3743
let mut hx = x.to_bits();

0 commit comments

Comments
 (0)