@@ -60,3 +60,62 @@ pub fn floor(mut x: f64) -> f64 {
6060 }
6161 x
6262}
63+ /// Implements the exponential functions with `x87` assembly.
64+ ///
65+ /// This relies on the instruction `f2xm1`, which computes `2^x - 1` (for
66+ /// |x| < 1). This transcendental instruction is documented to produce results
67+ /// with error below 1ulp (in the native double-extended precision format). This
68+ /// translates to correctly rounded results for f32, but results in f64 may have
69+ /// 1ulp error, which may depend on the hardware.
70+ macro_rules! x87exp {
71+ ( $float_ty: ident, $word_size: literal, $fn_name: ident, $load_op: literal) => {
72+ pub fn $fn_name( mut x: $float_ty) -> $float_ty { unsafe {
73+ core:: arch:: asm!(
74+ // Prepare the register stack as
75+ // ```
76+ // st(0) = y = x*log2(base)
77+ // st(1) = 1.0
78+ // st(2) = round(y)
79+ // ```
80+ concat!( $load_op, " " , $word_size, " ptr [{x}]" ) ,
81+ "fld1" ,
82+ "fld st(1)" ,
83+ "frndint" ,
84+ "fxch st(2)" ,
85+
86+ // Compare y with round(y) to determine if y is finite and
87+ // not an integer. If so, compute `exp2(y - round(y))` into
88+ // st(1). Otherwise skip ahead with `st(1) = 1.0`
89+ "fucom st(2)" ,
90+ "fstsw ax" ,
91+ "test ax, 0x4000" ,
92+ "jnz 2f" ,
93+ "fsub st(0), st(2)" , // st(0) = y - round(y)
94+ "f2xm1" , // st(0) = 2^st(0) - 1.0
95+ "fadd st(1), st(0)" , // st(1) = 1 + st(0) = exp2(y - round(y))
96+ "2:" ,
97+
98+ // Finally, scale by `exp2(round(y))` and clear the stack.
99+ "fstp st(0)" ,
100+ "fscale" ,
101+ concat!( "fstp " , $word_size, " ptr [{x}]" ) ,
102+ "fstp st(0)" ,
103+ x = in( reg) & mut x,
104+ out( "ax" ) _,
105+ out( "st(0)" ) _, out( "st(1)" ) _,
106+ out( "st(2)" ) _, out( "st(3)" ) _,
107+ out( "st(4)" ) _, out( "st(5)" ) _,
108+ out( "st(6)" ) _, out( "st(7)" ) _,
109+ options( nostack) ,
110+ ) ;
111+ x
112+ } }
113+ } ;
114+ }
115+
116+ x87exp ! ( f32 , "dword" , x87_exp2f, "fld" ) ;
117+ x87exp ! ( f64 , "qword" , x87_exp2, "fld" ) ;
118+ x87exp ! ( f32 , "dword" , x87_exp10f, "fldl2t\n fmul" ) ;
119+ x87exp ! ( f64 , "qword" , x87_exp10, "fldl2t\n fmul" ) ;
120+ x87exp ! ( f32 , "dword" , x87_expf, "fldl2e\n fmul" ) ;
121+ x87exp ! ( f64 , "qword" , x87_exp, "fldl2e\n fmul" ) ;
0 commit comments