From c119821e9299ba38c74ce4dc5a62b19363f0e81d Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 3 Dec 2018 13:40:03 -0500 Subject: [PATCH] Inline fldmod to allow optimizing division by a compile-time constant This allows LLVM to generate cheaper operations for dividing by a constant power-of-ten. On my machine, this drops the time for multiplying two FixedDecimal{Int32,2} numbers from 10.30ns to 2.92ns, or around a 70% improvement. --- src/FixedPointDecimals.jl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/FixedPointDecimals.jl b/src/FixedPointDecimals.jl index 866eb21..8aec0fb 100644 --- a/src/FixedPointDecimals.jl +++ b/src/FixedPointDecimals.jl @@ -154,12 +154,18 @@ function _round_to_even(quotient::T, remainder::T, divisor::T) where {T <: Integ end _round_to_even(q, r, d) = _round_to_even(promote(q, r, d)...) +# In many of our calls to fldmod, `y` is a constant (the coefficient, 10^f). However, since +# `fldmod` is sometimes not being inlined, that constant information is not available to the +# optimizer. We need an inlined version of fldmod so that the compiler can replace expensive +# divide-by-power-of-ten instructions with the cheaper multiply-by-inverse-coefficient. +@inline fldmodinline(x,y) = (fld(x,y), mod(x,y)) + # multiplication rounds to nearest even representation # TODO: can we use floating point to speed this up? after we build a # correctness test suite. function *(x::FD{T, f}, y::FD{T, f}) where {T, f} powt = coefficient(FD{T, f}) - quotient, remainder = fldmod(widemul(x.i, y.i), powt) + quotient, remainder = fldmodinline(widemul(x.i, y.i), powt) reinterpret(FD{T, f}, _round_to_even(quotient, remainder, powt)) end @@ -195,12 +201,12 @@ floor(x::FD{T, f}) where {T, f} = FD{T, f}(fld(x.i, coefficient(FD{T, f}))) # TODO: round with number of digits; should be easy function round(x::FD{T, f}, ::RoundingMode{:Nearest}=RoundNearest) where {T, f} powt = coefficient(FD{T, f}) - quotient, remainder = fldmod(x.i, powt) + quotient, remainder = fldmodinline(x.i, powt) FD{T, f}(_round_to_even(quotient, remainder, powt)) end function ceil(x::FD{T, f}) where {T, f} powt = coefficient(FD{T, f}) - quotient, remainder = fldmod(x.i, powt) + quotient, remainder = fldmodinline(x.i, powt) if remainder > 0 FD{T, f}(quotient + one(quotient)) else