From f6d375c0231747ff9b044b7494d43904c2f786ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Drvo=C5=A1t=C4=9Bp?= Date: Fri, 21 Jun 2024 17:21:52 +0200 Subject: [PATCH] Use the formulas from Hacker's delight for both signed and unsigned Ints --- src/fldmod-by-const.jl | 142 +++++++++------------------------- test/fldmod-by-const_tests.jl | 32 +------- 2 files changed, 39 insertions(+), 135 deletions(-) diff --git a/src/fldmod-by-const.jl b/src/fldmod-by-const.jl index 1562a4a..6c62ce3 100644 --- a/src/fldmod-by-const.jl +++ b/src/fldmod-by-const.jl @@ -68,93 +68,44 @@ end return x - quotient * y end -# This function is based on the native code produced by the following: -# @code_native ((x)->div(x, 100))(Int64(2)) -function div_by_const(x::T, ::Val{C}) where {T, C} - # These checks will be compiled away during specialization. - # While for `*(FixedDecimal, FixedDecimal)`, C will always be a power of 10, these - # checks allow this function to work for any `C > 0`, in case that's useful in the - # future. - if C == 1 - return x - elseif ispow2(C) - return div(x, C) # Will already do the right thing - elseif C <= 0 - throw(DomainError("C must be > 0")) - end - # Calculate the magic number 2^N/C. Note that this is computed statically, not at - # runtime. - inverse_coeff, toshift = calculate_inverse_coeff(T, C) - # Compute the upper-half of widemul(x, 2^nbits(T)/C). - # By keeping only the upper half, we're essentially dividing by 2^nbits(T), undoing the - # numerator of the multiplication, so that the result is equal to x/C. - out = mul_hi(x, inverse_coeff) - # This condition will be compiled away during specialization. - if T <: Signed - # Because our magic number has a leading one (since we shift all-the-way left), the - # result is negative if it's Signed. We add x to give us the positive equivalent. - out += x - signshift = (nbits(x) - 1) - isnegative = T(out >>> signshift) # 1 if < 0 else 0 (Unsigned bitshift to read top bit) - end - # Undo the bitshifts used to calculate the invoeff magic number with maximum precision. - out = out >> toshift - if T <: Signed - out = out + isnegative +# Unsigned magic number computation + shift by constant +# See Hacker's delight, equations (26) and (27) from Chapter 10-9. +# nmax and divisor must be > 0. +Base.@assume_effects :foldable function magicg(nmax::Unsigned, divisor) + T = typeof(nmax) + W = _widen(T) + d = W(divisor) + + nc = div(W(nmax) + W(1), d) * d - W(1) + nbits = sizeof(nmax) * 8 - leading_zeros(nmax) # most significant bit + for p in 0:2nbits + if W(2)^p > nc * (d - W(1) - rem(W(2)^p - W(1), d)) # (27) + m = div(W(2)^p + d - W(1) - rem(W(2)^p - W(1), d), d) # (26) + return (m, p) + end end - return T(out) + return W(0), 0 # Should never reach here end - - - -# REQUIRES: C must not be 0, 1, -1 -# See this implementation in LLVM: -# https://llvm.org/doxygen/DivisionByConstantInfo_8cpp_source.html -# which was taken from "Hacker's Delight". -Base.@assume_effects :foldable function calculate_inverse_coeff(::Type{T}, C) where {T} - # First, calculate 2^nbits(T)/C - # We shift away leading zeros to preserve the most precision when we use it to multiply - # in the next step. At the end, we will shift the final answer back to undo this - # operation (which is why we need to return `toshift`). - # Note, also, that we calculate invcoeff at double-precision so that the left-shift - # doesn't leave trailing zeros. We truncate to only the upper-half before returning. - UT = _unsigned(T) - invcoeff = typemax(_widen(UT)) ÷ C - toshift = leading_zeros(invcoeff) - invcoeff = invcoeff << toshift - # Now, truncate to only the upper half of invcoeff, after we've shifted. Instead of - # bitshifting, we round to maintain precision. (This is needed to prevent off-by-ones.) - # -- This is equivalent to `invcoeff = T(invcoeff >> sizeof(T))`, except rounded. -- - two_to_N = _widen(typemax(UT)) + UT(1) # Precise value for 2^nbits(T) (doesn't fit in T) - invcoeff = _round_to_nearest(fldmod(invcoeff, two_to_N)..., two_to_N ) % T - return invcoeff, toshift -end - -function mul_hi(x::T, y::T) where T - xy = _widemul(x, y) # support Int256 -> Int512 (!!) - (xy >> nbits(T)) % T -end - - - -# Unsigned magic number computation + divide by constant -# Hacker's delight figure 10–4 -Base.@assume_effects :foldable function magicgu(nmax, d) +# See Hacker's delight, equations (5) and (6) from Chapter 10-4. +# nmax and divisor must be > 0. +Base.@assume_effects :foldable function magicg(nmax::Signed, divisor) T = typeof(nmax) - nc = (nmax ÷ d) * d - 1 - N = T(floor(log2(nmax) + 1)) - MathType = _widen(T) - two = MathType(2) - for p in 0:2N - if two^p > nc * (d - 1 - (2^p - 1) % d) - m = (two^p + d - 1 - (2^p - 1) % d) ÷ d - return (m%T, p) + W = _widen(T) + d = W(divisor) + + nc = ((W(nmax) + W(1)) ÷ d) * d - W(1) + nbits = sizeof(nmax) * 8 - leading_zeros(nmax) # most significant bit + for p in 0:2nbits + if W(2)^p > nc * (d - rem(W(2)^p, d)) # (6) + m = div(W(2)^p + d - rem(W(2)^p, d), d) # (5) + return (m, p) end end - error("Can't find p, something is wrong. nmax: $(repr(nmax)), d: $(repr(d)), N: $N") + return W(0), 0 # Should never reach here end -function div_by_const(x::T, ::Val{C}) where {T<:Unsigned, C} + +function div_by_const(x::T, ::Val{C}) where {T, C} # These checks will be compiled away during specialization. # While for `*(FixedDecimal, FixedDecimal)`, C will always be a power of 10, these # checks allow this function to work for any `C > 0`, in case that's useful in the @@ -162,31 +113,14 @@ function div_by_const(x::T, ::Val{C}) where {T<:Unsigned, C} if C == 1 return x elseif ispow2(C) - return div(x, C) # Will already do the right thing + return div(x, C) # Will already do the right thing elseif C <= 0 throw(DomainError("C must be > 0")) end - # Calculate the magic number 2^N/C. Note that this is computed statically, not at - # runtime. - inverse_coeff, toshift = magicgu(typemax(T), C) - out = _widemul(x, inverse_coeff) # support Int256 -> Int512 (!!) - out = out >> toshift - return out % T -end - - - + # Calculate the magic number and shift amount, based on Hacker's Delight, Chapter 10. + magic_number, shift = magicg(typemax(T), C) - - -# Annoyingly, Unsigned(T) isn't defined for BitIntegers types: -# https://github.com/rfourquet/BitIntegers.jl/pull/2 -# Note: We do not need this for Int512, since we only widen to 512 _after_ calling -# _unsigned, above. This code is only for supporting the built-in integer types, which only -# go up to 128-bits (widened twice to 512). If a user wants to extend FixedDecimals for -# other integer types, they will need to add methods to either _unsigned or unsigned. -_unsigned(x) = unsigned(x) -_unsigned(::Type{Int256}) = UInt256 -_unsigned(::Type{UInt256}) = UInt256 - -nbits(x) = sizeof(x) * 8 + out = _widemul(promote(x, magic_number)...) + out >>= shift + return (out % T) + (x < zero(T)) # TODO: Figure out why and if the + 1 if non-positive is necessary +end diff --git a/test/fldmod-by-const_tests.jl b/test/fldmod-by-const_tests.jl index 0adaa35..95dfce9 100644 --- a/test/fldmod-by-const_tests.jl +++ b/test/fldmod-by-const_tests.jl @@ -1,36 +1,6 @@ using Test using FixedPointDecimals -@testset "calculate_inverse_coeff signed" begin - using FixedPointDecimals: calculate_inverse_coeff - - # The correct magic number here comes from investigating the following native code - # produced on an m2 aarch64 macbook pro: - # @code_native ((x)->fld(x,100))(2) - # ... - # mov x8, #55051 - # movk x8, #28835, lsl #16 - # movk x8, #2621, lsl #32 - # movk x8, #41943, lsl #48 - # Where: - # julia> 55051 | 28835 << 16 | 2621 << 32 | 41943 << 48 - # -6640827866535438581 - @test calculate_inverse_coeff(Int64, 100) == (-6640827866535438581, 6) - - # Same for the tests below: - - # (LLVM's magic number is shifted one bit less, then they shift by 2, instead of 3, - # but the result is the same.) - @test calculate_inverse_coeff(Int64, 10) == (7378697629483820647 << 1, 3) -end - -@testset "calculate_inverse_coeff signed 4" begin - using FixedPointDecimals: calculate_inverse_coeff - - # Same here, our magic number is shifted 2 bits more than LLVM's - @test calculate_inverse_coeff(UInt64, 100) == (0xa3d70a3d70a3d70b, 6) -end - @testset "div_by_const" begin vals = [2432, 100, 0x1, Int32(10000), typemax(Int64), typemax(Int16), 8, Int64(2)^32] for a_base in vals @@ -53,7 +23,7 @@ end end end -@testitem "fixed decimal multiplication - exhaustive 8-bit" begin +@testset "fixed decimal multiplication - exhaustive 8-bit" begin @testset for P in (0,1) @testset for T in (Int8, UInt8) FD = FixedDecimal{T,P}