fix randn!(local_rng(), ::AbstractArray{Float32})`

JuliaSIMD · Sep 30, 2021 · 257437f · 257437f · chriselrod · Sep 30, 2021
1 parent 217a15b
commit 257437f
Show file tree

Hide file tree

Showing 4 changed files with 76 additions and 73 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "VectorizedRNG"
 uuid = "33b4df10-0173-11e9-2a0c-851a7edac40e"
 authors = ["Chris Elrod <elrodc@gmail.com>"]
-version = "0.2.12"
+version = "0.2.13"
 
 [deps]
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"

diff --git a/src/api.jl b/src/api.jl
@@ -81,59 +81,59 @@ end
 
 
 @inline function randnormal(u1::AbstractSIMD{W,UInt64}, u2::AbstractSIMD{W,UInt64}, ::Type{T}) where {W,T<:Union{Float32,Float64}}
-    s, c = randsincos(u1, T)
-    r = sqrt(nlog01(u2,T))
-    s * r, c * r
+  s, c = randsincos(u1, T)
+  r = sqrt(nlog01(u2,T))
+  s * r, c * r
 end
 @inline function randnormal(u1::AbstractSIMD{1,UInt64}, u2::AbstractSIMD{1,UInt64}, ::Type{Float64})
-    s, c = randsincos(u1(1), Float64)
-    r = sqrt(nlog01(u2(1), Float64))
-    Vec{1,Float64}((Core.VecElement(s * r),)), Vec{1,Float64}((Core.VecElement(c * r),))
+  s, c = randsincos(u1(1), Float64)
+  r = sqrt(nlog01(u2(1), Float64))
+  Vec{1,Float64}((Core.VecElement(s * r),)), Vec{1,Float64}((Core.VecElement(c * r),))
 end
 
 @generated function random_normal(vu::VecUnroll{Nm1,W,UInt64,Vec{W,UInt64}}, ::Type{T}) where {Nm1,W,T}
-    # @assert isodd(Nm1)
-    N = Nm1 + 1
-    q = Expr(:block, Expr(:meta, :inline), :(u = data(vu)))
-    ib = Expr(:block)
-    n = 0
-    if n < Nm1
-        u1t = Expr(:tuple); u2t = Expr(:tuple)
-        while n < Nm1
-            push!(u1t.args, Expr(:ref, :u, n+1))
-            push!(u2t.args, Expr(:ref, :u, n+2))
-            # push!(ib.args, Expr(:(=), Expr(:tuple, Symbol(:n_,n), Symbol(:n_,n+1)), Expr(:call, :randnormal, Expr(:ref, :u, n+1), Expr(:ref, :u, n+2), T)))
-            n += 2
-        end
-        push!(ib.args, :((sr,cr) = randnormal(VecUnroll($u1t), VecUnroll($u2t), $T)))
-        push!(ib.args, :(srd = data(sr))); push!(ib.args, :(crd = data(cr)))
-    end
-    nout = Expr(:tuple)
-    for n ∈ 1:N>>1
-        push!(nout.args, Expr(:ref, :srd, n))
-        push!(nout.args, Expr(:ref, :crd, n))
+  # @assert isodd(Nm1)
+  N = Nm1 + 1
+  q = Expr(:block, Expr(:meta, :inline), :(u = data(vu)))
+  ib = Expr(:block)
+  n = 0
+  if n < Nm1
+    u1t = Expr(:tuple); u2t = Expr(:tuple)
+    while n < Nm1
+      push!(u1t.args, Expr(:ref, :u, n+1))
+      push!(u2t.args, Expr(:ref, :u, n+2))
+      # push!(ib.args, Expr(:(=), Expr(:tuple, Symbol(:n_,n), Symbol(:n_,n+1)), Expr(:call, :randnormal, Expr(:ref, :u, n+1), Expr(:ref, :u, n+2), T)))
+      n += 2
     end
-    if n < N # then there is odd remainder
-        # we split the vector in two, gen randnormal, and then recombine.
-        Wl = (W << 3) ÷ sizeof(T) 
-        Wh = Wl >>> 1
-        t1 = Expr(:tuple); t2 = Expr(:tuple); t3 = Expr(:tuple);
-        append!(t1.args, 0:Wh-1); append!(t2.args, Wh:Wl-1); append!(t3.args, 0:Wl-1)
-        lm = Expr(:call, Expr(:curly, :Val, t1))
-        um = Expr(:call, Expr(:curly, :Val, t2))
-        cm = Expr(:call, Expr(:curly, :Val, t3))
-        remq = quote
-            ulast = u[$N]
-            (sₗ, cᵤ) = randnormal(shufflevector(ulast, $lm), shufflevector(ulast, $um), $T)
-        end
-        push!(ib.args, remq)
-        push!(nout.args, :(shufflevector(sₗ, cᵤ, $cm)))
+    push!(ib.args, :((sr,cr) = randnormal(VecUnroll($u1t), VecUnroll($u2t), $T)))
+    push!(ib.args, :(srd = data(sr))); push!(ib.args, :(crd = data(cr)))
+  end
+  nout = Expr(:tuple)
+  for n ∈ 1:N>>1
+    push!(nout.args, Expr(:ref, :srd, n))
+    push!(nout.args, Expr(:ref, :crd, n))
+  end
+  if n < N # then there is odd remainder
+    # we split the vector in two, gen randnormal, and then recombine.
+    Wl = (W << 3) ÷ sizeof(T) 
+    Wh = Wl >>> 1
+    t1 = Expr(:tuple); t2 = Expr(:tuple); t3 = Expr(:tuple);
+    append!(t1.args, 0:Wh-1); append!(t2.args, Wh:Wl-1); append!(t3.args, 0:Wl-1)
+    lm = Expr(:call, Expr(:curly, :Val, t1))
+    um = Expr(:call, Expr(:curly, :Val, t2))
+    cm = Expr(:call, Expr(:curly, :Val, t3))
+    remq = quote
+      ulast = u[$N]
+      (sₗ, cᵤ) = randnormal(shufflevector(ulast, $lm), shufflevector(ulast, $um), $T)
     end
-    push!(ib.args, :(nout = $nout))
-    push!(q.args, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, Symbol(@__FILE__)), ib))
-                                   # push!(q.args, Expr(:tuple, [Symbol(:n_,n) for n ∈ 0:N-1]...))
-    push!(q.args, :(VecUnroll(nout)))
-    q
+    push!(ib.args, remq)
+    push!(nout.args, :(shufflevector(sₗ, cᵤ, $cm)))
+  end
+  push!(ib.args, :(nout = $nout))
+  push!(q.args, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, Symbol(@__FILE__)), ib))
+  # push!(q.args, Expr(:tuple, [Symbol(:n_,n) for n ∈ 0:N-1]...))
+  push!(q.args, :(VecUnroll(nout)))
+  q
 end
 
 @inline Random.randn(rng::AbstractVRNG, ::Type{VecUnroll{N,W,T}}) where {N,W,T} = randn(rng, VecUnroll{N,W,T,Vec{W,T}})

diff --git a/src/special_approximations.jl b/src/special_approximations.jl
@@ -174,34 +174,34 @@ end
 # end
 
 @inline function log2_3q(v, e)
-    T = eltype(v)
-    m1 = v * v 
-    fma1 = muladd(m1, T(0.22119417504560815), T(0.22007686931522777))
-    fma2 = muladd(fma1, m1, T(0.26237080574885147))
-    fma3 = muladd(fma2, m1, T(0.32059774779444955))
-    fma4 = muladd(fma3, m1, T(0.41219859454853247))
-    fma5 = muladd(fma4, m1, T(0.5770780162997059))
-    fma6 = muladd(fma5, m1, T(0.9617966939260809))
-    m2 = v * T(2.8853900817779268)
-    fma7 = VectorizationBase.vfmsub(v, T(2.8853900817779268), m2)
-    a1 = e + m2
-    s1 = e - a1
-    a2 = m2 + s1
-    a3 = fma7 + a2
-    m3 = v * m1
-    a4 = a1 + a3
-    muladd(fma6, m3, a4)
+  T = eltype(v)
+  m1 = v * v 
+  fma1 = muladd(m1, T(0.22119417504560815), T(0.22007686931522777))
+  fma2 = muladd(fma1, m1, T(0.26237080574885147))
+  fma3 = muladd(fma2, m1, T(0.32059774779444955))
+  fma4 = muladd(fma3, m1, T(0.41219859454853247))
+  fma5 = muladd(fma4, m1, T(0.5770780162997059))
+  fma6 = muladd(fma5, m1, T(0.9617966939260809))
+  m2 = v * T(2.8853900817779268)
+  fma7 = VectorizationBase.vfmsub(v, T(2.8853900817779268), m2)
+  a1 = e + m2
+  s1 = e - a1
+  a2 = m2 + s1
+  a3 = fma7 + a2
+  m3 = v * m1
+  a4 = a1 + a3
+  muladd(fma6, m3, a4)
 end
 @inline function nlog01(u, ::Type{T}) where {T}
-    lz = leading_zeros( u )
-    # f = mask(u, Float64) # shift by lz
-    # f = vmul(0.75, mask(shift_excess_zeros(u, lz), Float64)) # shift by lz
-    # f = vfdiv(vsub(f, 1.0), vadd(f, 1.0))
-    f = floatbitmask(shift_excess_zeros(u, lz), T) # shift by lz
-    f = ( f - T(1.3333333333333333) ) / ( f + T(1.3333333333333333) )
-    # l2h = log12_9(f)
-    l2 = log2_3q(f, T(-0.5849625007211561814537389439478165087598144076924810604557526545410982277943579) - lz)
-    T(-0.6931471805599453) * l2
+  lz = reinterpret(Base.uinttype(T), leading_zeros( u ))
+  # f = mask(u, Float64) # shift by lz
+  # f = vmul(0.75, mask(shift_excess_zeros(u, lz), Float64)) # shift by lz
+  # f = vfdiv(vsub(f, 1.0), vadd(f, 1.0))
+  f = floatbitmask(shift_excess_zeros(u, lz), T) # shift by lz
+  f = ( f - T(1.3333333333333333) ) / ( f + T(1.3333333333333333) )
+  # l2h = log12_9(f)
+  l2 = log2_3q(f, T(-0.5849625007211561814537389439478165087598144076924810604557526545410982277943579) - lz)
+  T(-0.6931471805599453) * l2
 end
 # TODO: Add support for Float32 
 

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -100,6 +100,9 @@ end
         A = zeros(13, 29);
         randn!(local_rng(), A);
         @test iszero(sum(iszero, A))
+        A32 = zeros(Float32, 13, 29);
+        randn!(local_rng(), A32);
+        @test iszero(sum(iszero, A32))
         # TODO: Support this again
         # rngnorm = RNGTest.wrap(RandNormal01(local_rng()), Float32);
         # res = RNGTest.smallcrushJulia(rngnorm)