diff --git a/Project.toml b/Project.toml index 4ff4bcf..770e866 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "VectorizedRNG" uuid = "33b4df10-0173-11e9-2a0c-851a7edac40e" authors = ["Chris Elrod "] -version = "0.2.9" +version = "0.2.10" [deps] Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" diff --git a/src/VectorizedRNG.jl b/src/VectorizedRNG.jl index 65a4d68..9e88905 100644 --- a/src/VectorizedRNG.jl +++ b/src/VectorizedRNG.jl @@ -33,46 +33,27 @@ include("xoshiro.jl") const GLOBAL_vRNGs = Ref{Ptr{UInt64}}() -local_rng(i) = Xoshift{XREGISTERS}(i*4simd_integer_register_size()*XREGISTERS + GLOBAL_vRNGs[]) + +const RNG_MEM_SIZE = (5(simd_integer_register_size()*XREGISTERS + 2048*3)) +local_rng(i) = Xoshift{XREGISTERS}(i*(RNG_MEM_SIZE) + GLOBAL_vRNGs[]) local_rng() = local_rng(Base.Threads.threadid() - 1) # include("precompile.jl") # _precompile_() -# const RANDBUFFER32 = Float32[] -# const RANDNBUFFER32 = Float32[] -# const RANDBUFFER64 = Float64[] -# const RANDNBUFFER64 = Float64[] -# const RANDBUFFERCOUNTER = UInt8[] -# const RANDNBUFFER32COUNTER = UInt8[] -# const RANDBUFFER64COUNTER = UInt8[] -# const RANDNBUFFER64COUNTER = UInt8[] - - - function __init__() # ccall(:jl_generating_output, Cint, ()) == 1 && return - nthreads = Base.Threads.nthreads() - nstreams = XREGISTERS * nthreads * simd_integer_register_size() - GLOBAL_vRNGs[] = ptr = VectorizationBase.valloc(5nstreams + 256 * 3nthreads, UInt64) - initXoshift!(ptr, nstreams) - - for tid ∈ 0:nthreads-1 - rng = local_rng(tid) - setrandu64counter!(rng, 0x00) - # setrandn32counter!(rng, 0x00) - setrand64counter!(rng, 0x00) - setrandn64counter!(rng, 0x00) - end - # resize!(RANDBUFFER32, 256nthreads) - # resize!(RANDNBUFFER32, 256nthreads) - # resize!(RANDBUFFER64, 256nthreads) - # resize!(RANDNBUFFER64, 256nthreads) - - # resize!(RANDBUFFERCOUNTER, VectorizationBase.CACHELINE_SIZE*nthreads); fill!(RANDBUFFERCOUNTER, 0) - # resize!(RANDNBUFFER32COUNTER, VectorizationBase.CACHELINE_SIZE*nthreads) - # resize!(RANDBUFFER64COUNTER, VectorizationBase.CACHELINE_SIZE*nthreads) - # resize!(RANDNBUFFER64COUNTER, VectorizationBase.CACHELINE_SIZE*nthreads) + nthreads = Base.Threads.nthreads() + GLOBAL_vRNGs[] = ptr = VectorizationBase.valloc((RNG_MEM_SIZE ÷ 8)*nthreads, UInt64) + nstreams = XREGISTERS * nthreads * simd_integer_register_size() + initXoshift!(ptr, nstreams) + for tid ∈ 0:nthreads-1 + rng = local_rng(tid) + setrandu64counter!(rng, 0x00) + # setrandn32counter!(rng, 0x00) + setrand64counter!(rng, 0x00) + setrandn64counter!(rng, 0x00) + end end