diff --git a/HISTORY.md b/HISTORY.md index aa7f9f0ccdad6..c3ca212453d07 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -136,6 +136,14 @@ Standard library changes `AnnotatedString` with various faces or other attributes applied ([#49586]). #### Package Manager +* It is now possible to specify "sources" for packages in a `[sources]` section in Project.toml. + This can be used to add non-registered normal or test dependencies. +* Pkg now obeys `[compat]` bounds for `julia` and raises an error if the version of the running Julia binary is incompatible with the bounds in `Project.toml`. + Pkg has always obeyed this compat when working with Registry packages. This change affects mostly local packages +* `pkg> add` and `Pkg.add` will now add compat entries for new direct dependencies if the active environment is a + package (has a `name` and `uuid` entry). +* Dependencies can now be directly added as weak deps or extras via the `pkg> add --weak/extra Foo` or + `Pkg.add("Foo", target=:weakdeps/:extras)` forms. #### LinearAlgebra * `cbrt(::AbstractMatrix{<:Real})` is now defined and returns real-valued matrix cube roots of real-valued matrices ([#50661]). diff --git a/base/Base.jl b/base/Base.jl index 1e780bb15141a..bfac74e5d7bab 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -25,6 +25,11 @@ function include(mod::Module, path::String) end include(path::String) = include(Base, path) +struct IncludeInto <: Function + m::Module +end +(this::IncludeInto)(fname::AbstractString) = include(this.m, fname) + # from now on, this is now a top-module for resolving syntax const is_primary_base_module = ccall(:jl_module_parent, Ref{Module}, (Any,), Base) === Core.Main ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Base, is_primary_base_module) @@ -533,6 +538,7 @@ include("deepcopy.jl") include("download.jl") include("summarysize.jl") include("errorshow.jl") +include("util.jl") include("initdefs.jl") Filesystem.__postinit__() @@ -549,7 +555,6 @@ include("loading.jl") # misc useful functions & macros include("timing.jl") -include("util.jl") include("client.jl") include("asyncmap.jl") @@ -572,6 +577,9 @@ include("precompilation.jl") for m in methods(include) delete_method(m) end +for m in methods(IncludeInto(Base)) + delete_method(m) +end # This method is here only to be overwritten during the test suite to test # various sysimg related invalidation scenarios. @@ -579,8 +587,10 @@ a_method_to_overwrite_in_test() = inferencebarrier(1) # These functions are duplicated in client.jl/include(::String) for # nicer stacktraces. Modifications here have to be backported there -include(mod::Module, _path::AbstractString) = _include(identity, mod, _path) -include(mapexpr::Function, mod::Module, _path::AbstractString) = _include(mapexpr, mod, _path) +@noinline include(mod::Module, _path::AbstractString) = _include(identity, mod, _path) +@noinline include(mapexpr::Function, mod::Module, _path::AbstractString) = _include(mapexpr, mod, _path) +(this::IncludeInto)(fname::AbstractString) = include(identity, this.m, fname) +(this::IncludeInto)(mapexpr::Function, fname::AbstractString) = include(mapexpr, this.m, fname) # External libraries vendored into Base Core.println("JuliaSyntax/src/JuliaSyntax.jl") @@ -646,7 +656,6 @@ function __init__() init_load_path() init_active_project() append!(empty!(_sysimage_modules), keys(loaded_modules)) - empty!(explicit_loaded_modules) empty!(loaded_precompiles) # If we load a packageimage when building the image this might not be empty for (mod, key) in module_keys push!(get!(Vector{Module}, loaded_precompiles, key), mod) diff --git a/base/array.jl b/base/array.jl index a628c1212659d..40907b2b00317 100644 --- a/base/array.jl +++ b/base/array.jl @@ -355,6 +355,17 @@ copy return $(Expr(:new, :(typeof(a)), :(memoryref(newmem)), :(a.size))) end +# a mutating version of copyto! that results in dst aliasing src afterwards +function _take!(dst::Array{T,N}, src::Array{T,N}) where {T,N} + if getfield(dst, :ref) !== getfield(src, :ref) + setfield!(dst, :ref, getfield(src, :ref)) + end + if getfield(dst, :size) !== getfield(src, :size) + setfield!(dst, :size, getfield(src, :size)) + end + return dst +end + ## Constructors ## similar(a::Array{T,1}) where {T} = Vector{T}(undef, size(a,1)) diff --git a/base/boot.jl b/base/boot.jl index 861c83a2edac5..ed3e22391f215 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -454,9 +454,13 @@ Nothing() = nothing # This should always be inlined getptls() = ccall(:jl_get_ptls_states, Ptr{Cvoid}, ()) -include(m::Module, fname::String) = ccall(:jl_load_, Any, (Any, Any), m, fname) +include(m::Module, fname::String) = (@noinline; ccall(:jl_load_, Any, (Any, Any), m, fname)) +eval(m::Module, @nospecialize(e)) = (@noinline; ccall(:jl_toplevel_eval_in, Any, (Any, Any), m, e)) -eval(m::Module, @nospecialize(e)) = ccall(:jl_toplevel_eval_in, Any, (Any, Any), m, e) +struct EvalInto <: Function + m::Module +end +(this::EvalInto)(@nospecialize(e)) = eval(this.m, e) mutable struct Box contents::Any diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl index e70633ffecf6a..2eacdf0f56cfe 100644 --- a/base/compiler/ssair/slot2ssa.jl +++ b/base/compiler/ssair/slot2ssa.jl @@ -339,43 +339,58 @@ RPO traversal and in particular, any use of an SSA value must come after (by linear order) its definition. """ function domsort_ssa!(ir::IRCode, domtree::DomTree) - # First compute the new order of basic blocks + # Mapping from new → old BB index + # An "old" index of 0 means that this was a BB inserted as part of a fixup (see below) result_order = Int[] - stack = Int[] + + # Mapping from old → new BB index bb_rename = fill(-1, length(ir.cfg.blocks)) - node = 1 - ncritbreaks = 0 - nnewfallthroughs = 0 - while node !== -1 - push!(result_order, node) - bb_rename[node] = length(result_order) - cs = domtree.nodes[node].children - terminator = ir[SSAValue(last(ir.cfg.blocks[node].stmts))][:stmt] - next_node = node + 1 - node = -1 + + # The number of GotoNodes we need to insert to preserve control-flow after sorting + nfixupstmts = 0 + + # node queued up for scheduling (-1 === nothing) + node_to_schedule = 1 + worklist = Int[] + while node_to_schedule !== -1 + # First assign a new BB index to `node_to_schedule` + push!(result_order, node_to_schedule) + bb_rename[node_to_schedule] = length(result_order) + cs = domtree.nodes[node_to_schedule].children + terminator = ir[SSAValue(last(ir.cfg.blocks[node_to_schedule].stmts))][:stmt] + fallthrough = node_to_schedule + 1 + node_to_schedule = -1 + # Adding the nodes in reverse sorted order attempts to retain # the original source order of the nodes as much as possible. # This is not required for correctness, but is easier on the humans - for child in Iterators.Reverse(cs) - if child == next_node + for node in Iterators.Reverse(cs) + if node == fallthrough # Schedule the fall through node first, # so we can retain the fall through - node = next_node + node_to_schedule = node else - push!(stack, child) + push!(worklist, node) end end - if node == -1 && !isempty(stack) - node = pop!(stack) + if node_to_schedule == -1 && !isempty(worklist) + node_to_schedule = pop!(worklist) end - if node != next_node && !isa(terminator, Union{GotoNode, ReturnNode}) + # If a fallthrough successor is no longer the fallthrough after sorting, we need to + # add a GotoNode (and either extend or split the basic block as necessary) + if node_to_schedule != fallthrough && !isa(terminator, Union{GotoNode, ReturnNode}) if isa(terminator, GotoIfNot) # Need to break the critical edge - ncritbreaks += 1 + push!(result_order, 0) + elseif isa(terminator, EnterNode) || isexpr(terminator, :leave) + # Cannot extend the BasicBlock with a goto, have to split it push!(result_order, 0) else - nnewfallthroughs += 1 + # No need for a new block, just extend + @assert !isterminator(terminator) end + # Reserve space for the fixup goto + nfixupstmts += 1 end end new_bbs = Vector{BasicBlock}(undef, length(result_order)) @@ -385,7 +400,7 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree) nstmts += length(ir.cfg.blocks[i].stmts) end end - result = InstructionStream(nstmts + ncritbreaks + nnewfallthroughs) + result = InstructionStream(nstmts + nfixupstmts) inst_rename = Vector{SSAValue}(undef, length(ir.stmts) + length(ir.new_nodes)) @inbounds for i = 1:length(ir.stmts) inst_rename[i] = SSAValue(-1) @@ -394,7 +409,6 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree) inst_rename[i + length(ir.stmts)] = SSAValue(i + length(result)) end bb_start_off = 0 - crit_edge_breaks_fixup = Tuple{Int, Int}[] for (new_bb, bb) in pairs(result_order) if bb == 0 nidx = bb_start_off + 1 @@ -426,8 +440,8 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree) else result[inst_range[end]][:stmt] = GotoNode(bb_rename[terminator.label]) end - elseif isa(terminator, GotoIfNot) - # Check if we need to break the critical edge + elseif isa(terminator, GotoIfNot) || isa(terminator, EnterNode) || isexpr(terminator, :leave) + # Check if we need to break the critical edge or split the block if bb_rename[bb + 1] != new_bb + 1 @assert result_order[new_bb + 1] == 0 # Add an explicit goto node in the next basic block (we accounted for this above) @@ -435,11 +449,14 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree) node = result[nidx] node[:stmt], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, NoLineUpdate end - result[inst_range[end]][:stmt] = GotoIfNot(terminator.cond, bb_rename[terminator.dest]) - elseif !isa(terminator, ReturnNode) - if isa(terminator, EnterNode) + if isa(terminator, GotoIfNot) + result[inst_range[end]][:stmt] = GotoIfNot(terminator.cond, bb_rename[terminator.dest]) + elseif isa(terminator, EnterNode) result[inst_range[end]][:stmt] = EnterNode(terminator, terminator.catch_dest == 0 ? 0 : bb_rename[terminator.catch_dest]) + else + @assert isexpr(terminator, :leave) end + elseif !isa(terminator, ReturnNode) if bb_rename[bb + 1] != new_bb + 1 # Add an explicit goto node nidx = inst_range[end] + 1 @@ -452,7 +469,7 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree) local new_preds, new_succs let bb = bb, bb_rename = bb_rename, result_order = result_order new_preds = Int[bb for bb in (rename_incoming_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].preds) if bb != -1] - new_succs = Int[ rename_outgoing_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].succs] + new_succs = Int[ rename_outgoing_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].succs] end new_bbs[new_bb] = BasicBlock(inst_range, new_preds, new_succs) end diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index a6b7e53c6f320..450cfdcfadf82 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -601,8 +601,16 @@ add_tfunc(svec, 0, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->SimpleVec return TypeVar end end - tv = TypeVar(nval, lb, ub) - return PartialTypeVar(tv, lb_certain, ub_certain) + lb_valid = lb isa Type || lb isa TypeVar + ub_valid = ub isa Type || ub isa TypeVar + if lb_valid && ub_valid + tv = TypeVar(nval, lb, ub) + return PartialTypeVar(tv, lb_certain, ub_certain) + elseif !lb_valid && lb_certain + return Union{} + elseif !ub_valid && ub_certain + return Union{} + end end return TypeVar end diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl index 0d5d5ac00e8d0..b080bf51e5e98 100644 --- a/base/docs/basedocs.jl +++ b/base/docs/basedocs.jl @@ -2580,7 +2580,7 @@ cases. See also [`setproperty!`](@ref Base.setproperty!) and [`getglobal`](@ref) # Examples -```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*)*" +```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*\\n.*)*" julia> module M; global a; end; julia> M.a # same as `getglobal(M, :a)` diff --git a/base/errorshow.jl b/base/errorshow.jl index 20bdee1de6ec0..7225a024f529e 100644 --- a/base/errorshow.jl +++ b/base/errorshow.jl @@ -850,7 +850,10 @@ function _simplify_include_frames(trace) for i in length(trace):-1:1 frame::StackFrame, _ = trace[i] mod = parentmodule(frame) - if first_ignored === nothing + if mod === Base && frame.func === :IncludeInto || + mod === Core && frame.func === :EvalInto + kept_frames[i] = false + elseif first_ignored === nothing if mod === Base && frame.func === :_include # Hide include() machinery by default first_ignored = i diff --git a/base/essentials.jl b/base/essentials.jl index 0e7be924c908c..750ee0f9c434c 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -1250,6 +1250,53 @@ function isiterable(T)::Bool return hasmethod(iterate, Tuple{T}) end +""" + @world(sym, world) + +Resolve the binding `sym` in world `world`. See [`invoke_in_world`](@ref) for running +arbitrary code in fixed worlds. `world` may be `UnitRange`, in which case the macro +will error unless the binding is valid and has the same value across the entire world +range. + +The `@world` macro is primarily used in the printing of bindings that are no longer +available in the current world. + +## Example +``` +julia> struct Foo; a::Int; end +Foo + +julia> fold = Foo(1) + +julia> Int(Base.get_world_counter()) +26866 + +julia> struct Foo; a::Int; b::Int end +Foo + +julia> fold +@world(Foo, 26866)(1) +``` + +!!! compat "Julia 1.12" + This functionality requires at least Julia 1.12. +""" +macro world(sym, world) + if isa(sym, Symbol) + return :($(_resolve_in_world)($(esc(world)), $(QuoteNode(GlobalRef(__module__, sym))))) + elseif isa(sym, GlobalRef) + return :($(_resolve_in_world)($(esc(world)), $(QuoteNode(sym)))) + elseif isa(sym, Expr) && sym.head === :(.) && + length(sym.args) == 2 && isa(sym.args[2], QuoteNode) && isa(sym.args[2].value, Symbol) + return :($(_resolve_in_world)($(esc(world)), $(GlobalRef)($(esc(sym.args[1])), $(sym.args[2])))) + else + error("`@world` requires a symbol or GlobalRef") + end +end + +_resolve_in_world(world::Integer, gr::GlobalRef) = + invoke_in_world(UInt(world), Core.getglobal, gr.mod, gr.name) + # Special constprop heuristics for various binary opes typename(typeof(function + end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC typename(typeof(function - end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC diff --git a/base/genericmemory.jl b/base/genericmemory.jl index 89861444d9652..de1fc668333f5 100644 --- a/base/genericmemory.jl +++ b/base/genericmemory.jl @@ -118,7 +118,17 @@ function unsafe_copyto!(dest::MemoryRef{T}, src::MemoryRef{T}, n) where {T} @_terminates_globally_notaskstate_meta n == 0 && return dest @boundscheck memoryref(dest, n), memoryref(src, n) - ccall(:jl_genericmemory_copyto, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int), dest.mem, dest.ptr_or_offset, src.mem, src.ptr_or_offset, Int(n)) + if isbitstype(T) + tdest = @_gc_preserve_begin dest + tsrc = @_gc_preserve_begin src + pdest = unsafe_convert(Ptr{Cvoid}, dest) + psrc = unsafe_convert(Ptr{Cvoid}, src) + memmove(pdest, psrc, aligned_sizeof(T) * n) + @_gc_preserve_end tdest + @_gc_preserve_end tsrc + else + ccall(:jl_genericmemory_copyto, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int), dest.mem, dest.ptr_or_offset, src.mem, src.ptr_or_offset, Int(n)) + end return dest end diff --git a/base/irrationals.jl b/base/irrationals.jl index c51b66045723f..76222997865c0 100644 --- a/base/irrationals.jl +++ b/base/irrationals.jl @@ -45,7 +45,16 @@ promote_rule(::Type{<:AbstractIrrational}, ::Type{Float16}) = Float16 promote_rule(::Type{<:AbstractIrrational}, ::Type{Float32}) = Float32 promote_rule(::Type{<:AbstractIrrational}, ::Type{<:AbstractIrrational}) = Float64 promote_rule(::Type{<:AbstractIrrational}, ::Type{T}) where {T<:Real} = promote_type(Float64, T) -promote_rule(::Type{S}, ::Type{T}) where {S<:AbstractIrrational,T<:Number} = promote_type(promote_type(S, real(T)), T) + +function promote_rule(::Type{S}, ::Type{T}) where {S<:AbstractIrrational,T<:Number} + U = promote_type(S, real(T)) + if S <: U + # prevent infinite recursion + promote_type(Float64, T) + else + promote_type(U, T) + end +end AbstractFloat(x::AbstractIrrational) = Float64(x)::Float64 Float16(x::AbstractIrrational) = Float16(Float32(x)::Float32) diff --git a/base/loading.jl b/base/loading.jl index b396c7897c1fd..69bb332193519 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -974,14 +974,14 @@ function explicit_manifest_deps_get(project_file::String, where::PkgId, name::St entry = entry::Dict{String, Any} uuid = get(entry, "uuid", nothing)::Union{String, Nothing} uuid === nothing && continue + # deps is either a list of names (deps = ["DepA", "DepB"]) or + # a table of entries (deps = {"DepA" = "6ea...", "DepB" = "55d..."} + deps = get(entry, "deps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing} if UUID(uuid) === where.uuid found_where = true - # deps is either a list of names (deps = ["DepA", "DepB"]) or - # a table of entries (deps = {"DepA" = "6ea...", "DepB" = "55d..."} - deps = get(entry, "deps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing} if deps isa Vector{String} found_name = name in deps - break + found_name && @goto done elseif deps isa Dict{String, Any} deps = deps::Dict{String, Any} for (dep, uuid) in deps @@ -1000,23 +1000,25 @@ function explicit_manifest_deps_get(project_file::String, where::PkgId, name::St return PkgId(UUID(uuid), name) end exts = extensions[where.name]::Union{String, Vector{String}} + weakdeps = get(entry, "weakdeps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing} if (exts isa String && name == exts) || (exts isa Vector{String} && name in exts) - weakdeps = get(entry, "weakdeps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing} - if weakdeps !== nothing - if weakdeps isa Vector{String} - found_name = name in weakdeps - break - elseif weakdeps isa Dict{String, Any} - weakdeps = weakdeps::Dict{String, Any} - for (dep, uuid) in weakdeps - uuid::String - if dep === name - return PkgId(UUID(uuid), name) + for deps′ in [weakdeps, deps] + if deps′ !== nothing + if deps′ isa Vector{String} + found_name = name in deps′ + found_name && @goto done + elseif deps′ isa Dict{String, Any} + deps′ = deps′::Dict{String, Any} + for (dep, uuid) in deps′ + uuid::String + if dep === name + return PkgId(UUID(uuid), name) + end + end end end end end - end # `name` is not an ext, do standard lookup as if this was the parent return identify_package(PkgId(UUID(uuid), dep_name), name) end @@ -1024,6 +1026,7 @@ function explicit_manifest_deps_get(project_file::String, where::PkgId, name::St end end end + @label done found_where || return nothing found_name || return PkgId(name) # Only reach here if deps was not a dict which mean we have a unique name for the dep @@ -1247,7 +1250,7 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No else io = open(path, "r") try - iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.") + iszero(isvalid_cache_header(io)) && return ArgumentError("Incompatible header in cache file $path.") _, (includes, _, _), _, _, _, _, _, _ = parse_cache_header(io, path) ignore_native = pkg_tracked(includes) finally @@ -1554,7 +1557,7 @@ function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any} # TODO: Better error message if this lookup fails? uuid_trigger = UUID(totaldeps[trigger]::String) trigger_id = PkgId(uuid_trigger, trigger) - if !haskey(explicit_loaded_modules, trigger_id) || haskey(package_locks, trigger_id) + if !haskey(Base.loaded_modules, trigger_id) || haskey(package_locks, trigger_id) trigger1 = get!(Vector{ExtensionId}, EXT_DORMITORY, trigger_id) push!(trigger1, gid) else @@ -1564,7 +1567,6 @@ function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any} end end -precompiling_package::Bool = false loading_extension::Bool = false precompiling_extension::Bool = false function run_extension_callbacks(extid::ExtensionId) @@ -1683,6 +1685,8 @@ function CacheFlags(cf::CacheFlags=CacheFlags(ccall(:jl_cache_flags, UInt8, ())) opt_level === nothing ? cf.opt_level : opt_level ) end +# reflecting jloptions.c defaults +const DefaultCacheFlags = CacheFlags(use_pkgimages=true, debug_level=isdebugbuild() ? 2 : 1, check_bounds=0, inline=true, opt_level=2) function _cacheflag_to_uint8(cf::CacheFlags)::UInt8 f = UInt8(0) @@ -1694,12 +1698,29 @@ function _cacheflag_to_uint8(cf::CacheFlags)::UInt8 return f end +function translate_cache_flags(cacheflags::CacheFlags, defaultflags::CacheFlags) + opts = String[] + cacheflags.use_pkgimages != defaultflags.use_pkgimages && push!(opts, cacheflags.use_pkgimages ? "--pkgimages=yes" : "--pkgimages=no") + cacheflags.debug_level != defaultflags.debug_level && push!(opts, "-g$(cacheflags.debug_level)") + cacheflags.check_bounds != defaultflags.check_bounds && push!(opts, ("--check-bounds=auto", "--check-bounds=yes", "--check-bounds=no")[cacheflags.check_bounds + 1]) + cacheflags.inline != defaultflags.inline && push!(opts, cacheflags.inline ? "--inline=yes" : "--inline=no") + cacheflags.opt_level != defaultflags.opt_level && push!(opts, "-O$(cacheflags.opt_level)") + return opts +end + function show(io::IO, cf::CacheFlags) - print(io, "use_pkgimages = ", cf.use_pkgimages) - print(io, ", debug_level = ", cf.debug_level) - print(io, ", check_bounds = ", cf.check_bounds) - print(io, ", inline = ", cf.inline) - print(io, ", opt_level = ", cf.opt_level) + print(io, "CacheFlags(") + print(io, "; use_pkgimages=") + print(io, cf.use_pkgimages) + print(io, ", debug_level=") + print(io, cf.debug_level) + print(io, ", check_bounds=") + print(io, cf.check_bounds) + print(io, ", inline=") + print(io, cf.inline) + print(io, ", opt_level=") + print(io, cf.opt_level) + print(io, ")") end struct ImageTarget @@ -1868,7 +1889,7 @@ function isrelocatable(pkg::PkgId) isnothing(path) && return false io = open(path, "r") try - iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) + iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile.")) _, (includes, includes_srcfiles, _), _... = _parse_cache_header(io, path) for inc in includes !startswith(inc.filename, "@depot") && return false @@ -1943,7 +1964,7 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union io = open(path, "r") ignore_native = false try - iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.") + iszero(isvalid_cache_header(io)) && return ArgumentError("Incompatible header in cache file $path.") _, (includes, _, _), depmodnames, _, _, _, clone_targets, _ = parse_cache_header(io, path) ignore_native = pkg_tracked(includes) @@ -2093,6 +2114,7 @@ debug_loading_deadlocks::Bool = true # Enable a slightly more expensive, but mor function start_loading(modkey::PkgId, build_id::UInt128, stalecheck::Bool) # handle recursive and concurrent calls to require assert_havelock(require_lock) + require_lock.reentrancy_cnt == 1 || throw(ConcurrencyViolationError("recursive call to start_loading")) while true loaded = stalecheck ? maybe_root_module(modkey) : nothing loaded isa Module && return loaded @@ -2268,11 +2290,6 @@ For more details regarding code loading, see the manual sections on [modules](@r [parallel computing](@ref code-availability). """ function require(into::Module, mod::Symbol) - if into === Base.__toplevel__ && precompiling_package - # this error type needs to match the error type compilecache throws for non-125 errors. - error("`using/import $mod` outside of a Module detected. Importing a package outside of a module \ - is not allowed during package precompilation.") - end if _require_world_age[] != typemax(UInt) Base.invoke_in_world(_require_world_age[], __require, into, mod) else @@ -2281,6 +2298,10 @@ function require(into::Module, mod::Symbol) end function __require(into::Module, mod::Symbol) + if into === Base.__toplevel__ && generating_output(#=incremental=#true) + error("`using/import $mod` outside of a Module detected. Importing a package outside of a module \ + is not allowed during package precompilation.") + end @lock require_lock begin LOADING_CACHE[] = LoadingCache() try @@ -2410,9 +2431,8 @@ function __require_prelocked(uuidkey::PkgId, env=nothing) insert_extension_triggers(uuidkey) # After successfully loading, notify downstream consumers run_package_callbacks(uuidkey) - elseif !haskey(explicit_loaded_modules, uuidkey) - explicit_loaded_modules[uuidkey] = m - run_package_callbacks(uuidkey) + else + newm = root_module(uuidkey) end return m end @@ -2425,7 +2445,6 @@ end PkgOrigin() = PkgOrigin(nothing, nothing, nothing) const pkgorigins = Dict{PkgId,PkgOrigin}() -const explicit_loaded_modules = Dict{PkgId,Module}() # Emptied on Julia start const loaded_modules = Dict{PkgId,Module}() # available to be explicitly loaded const loaded_precompiles = Dict{PkgId,Vector{Module}}() # extended (complete) list of modules, available to be loaded const loaded_modules_order = Vector{Module}() @@ -2465,7 +2484,6 @@ end end maybe_loaded_precompile(key, module_build_id(m)) === nothing && push!(loaded_modules_order, m) loaded_modules[key] = m - explicit_loaded_modules[key] = m module_keys[m] = key end nothing @@ -2497,9 +2515,6 @@ loaded_modules_array() = @lock require_lock copy(loaded_modules_order) # after unreference_module, a subsequent require call will try to load a new copy of it, if stale # reload(m) = (unreference_module(m); require(m)) function unreference_module(key::PkgId) - if haskey(explicit_loaded_modules, key) - m = pop!(explicit_loaded_modules, key) - end if haskey(loaded_modules, key) m = pop!(loaded_modules, key) # need to ensure all modules are GC rooted; will still be referenced @@ -2689,6 +2704,10 @@ end [2] https://github.com/JuliaLang/StyledStrings.jl/issues/91#issuecomment-2379602914 """ function require_stdlib(package_uuidkey::PkgId, ext::Union{Nothing, String}=nothing) + if generating_output(#=incremental=#true) + # Otherwise this would lead to awkward dependency issues by loading a package that isn't in the Project/Manifest + error("This interactive function requires a stdlib to be loaded, and package code should instead use it directly from that stdlib.") + end @lock require_lock begin # the PkgId of the ext, or package if not an ext this_uuidkey = ext isa String ? PkgId(uuid5(package_uuidkey.uuid, ext), ext) : package_uuidkey @@ -2871,12 +2890,12 @@ julia> rm("testfile.jl") ``` """ function evalfile(path::AbstractString, args::Vector{String}=String[]) - return Core.eval(Module(:__anon__), + m = Module(:__anon__) + return Core.eval(m, Expr(:toplevel, :(const ARGS = $args), - :(eval(x) = $(Expr(:core, :eval))(__anon__, x)), - :(include(x::AbstractString) = $(Expr(:top, :include))(__anon__, x)), - :(include(mapexpr::Function, x::AbstractString) = $(Expr(:top, :include))(mapexpr, __anon__, x)), + :(const include = $(Base.IncludeInto(m))), + :(const eval = $(Core.EvalInto(m))), :(include($path)))) end evalfile(path::AbstractString, args::Vector) = evalfile(path, String[args...]) @@ -2952,7 +2971,8 @@ end const PRECOMPILE_TRACE_COMPILE = Ref{String}() function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::Union{Nothing, String}, - concrete_deps::typeof(_concrete_dependencies), flags::Cmd=``, internal_stderr::IO = stderr, internal_stdout::IO = stdout, isext::Bool=false) + concrete_deps::typeof(_concrete_dependencies), flags::Cmd=``, cacheflags::CacheFlags=CacheFlags(), + internal_stderr::IO = stderr, internal_stdout::IO = stdout, isext::Bool=false) @nospecialize internal_stderr internal_stdout rm(output, force=true) # Remove file if it exists output_o === nothing || rm(output_o, force=true) @@ -2995,24 +3015,29 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, output_o:: deps = deps_eltype * "[" * join(deps_strs, ",") * "]" precomp_stack = "Base.PkgId[$(join(map(pkg_str, vcat(Base.precompilation_stack, pkg)), ", "))]" + if output_o === nothing + # remove options that make no difference given the other cache options + cacheflags = CacheFlags(cacheflags, opt_level=0) + end + opts = translate_cache_flags(cacheflags, CacheFlags()) # julia_cmd is generated for the running system, and must be fixed if running for precompile instead if output_o !== nothing @debug "Generating object cache file for $(repr("text/plain", pkg))" cpu_target = get(ENV, "JULIA_CPU_TARGET", nothing) - opts = `--output-o $(output_o) --output-ji $(output) --output-incremental=yes` + push!(opts, "--output-o", output_o) else @debug "Generating cache file for $(repr("text/plain", pkg))" cpu_target = nothing - opts = `-O0 --output-ji $(output) --output-incremental=yes` end + push!(opts, "--output-ji", output) + isassigned(PRECOMPILE_TRACE_COMPILE) && push!(opts, "--trace-compile=$(PRECOMPILE_TRACE_COMPILE[])") - trace = isassigned(PRECOMPILE_TRACE_COMPILE) ? `--trace-compile=$(PRECOMPILE_TRACE_COMPILE[]) --trace-compile-timing` : `` io = open(pipeline(addenv(`$(julia_cmd(;cpu_target)::Cmd) - $(flags) - $(opts) - --startup-file=no --history-file=no --warn-overwrite=yes - --color=$(have_color === nothing ? "auto" : have_color ? "yes" : "no") - $trace - -`, + $(flags) + $(opts) + --output-incremental=yes + --startup-file=no --history-file=no --warn-overwrite=yes + $(have_color === nothing ? "--color=auto" : have_color ? "--color=yes" : "--color=no") + -`, "OPENBLAS_NUM_THREADS" => 1, "JULIA_NUM_THREADS" => 1), stderr = internal_stderr, stdout = internal_stdout), @@ -3022,7 +3047,6 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, output_o:: empty!(Base.EXT_DORMITORY) # If we have a custom sysimage with `EXT_DORMITORY` prepopulated Base.track_nested_precomp($precomp_stack) Base.precompiling_extension = $(loading_extension | isext) - Base.precompiling_package = true Base.include_package_for_output($(pkg_str(pkg)), $(repr(abspath(input))), $(repr(depot_path)), $(repr(dl_load_path)), $(repr(load_path)), $deps, $(repr(source_path(nothing)))) """) @@ -3099,7 +3123,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in # build up the list of modules that we want the precompile process to preserve if keep_loaded_modules concrete_deps = copy(_concrete_dependencies) - for (pkgreq, modreq) in loaded_modules # TODO: convert all relevant staleness heuristics to use explicit_loaded_modules instead + for (pkgreq, modreq) in loaded_modules if !(pkgreq === Main || pkgreq === Core || pkgreq === Base) push!(concrete_deps, pkgreq => module_build_id(modreq)) end @@ -3130,7 +3154,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in close(tmpio_o) close(tmpio_so) end - p = create_expr_cache(pkg, path, tmppath, tmppath_o, concrete_deps, flags, internal_stderr, internal_stdout, isext) + p = create_expr_cache(pkg, path, tmppath, tmppath_o, concrete_deps, flags, cacheflags, internal_stderr, internal_stdout, isext) if success(p) if cache_objects @@ -3153,7 +3177,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in # append extra crc to the end of the .ji file: open(tmppath, "r+") do f if iszero(isvalid_cache_header(f)) - error("Invalid header for $(repr("text/plain", pkg)) in new cache file $(repr(tmppath)).") + error("Incompatible header for $(repr("text/plain", pkg)) in new cache file $(repr(tmppath)).") end seekend(f) write(f, crc_so) @@ -3477,7 +3501,7 @@ end function parse_cache_header(cachefile::String) io = open(cachefile, "r") try - iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) + iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile.")) ret = parse_cache_header(io, cachefile) return ret finally @@ -3490,7 +3514,7 @@ function preferences_hash(cachefile::String) io = open(cachefile, "r") try if iszero(isvalid_cache_header(io)) - throw(ArgumentError("Invalid header in cache file $cachefile.")) + throw(ArgumentError("Incompatible header in cache file $cachefile.")) end return preferences_hash(io, cachefile) finally @@ -3506,7 +3530,7 @@ end function cache_dependencies(cachefile::String) io = open(cachefile, "r") try - iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) + iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile.")) return cache_dependencies(io, cachefile) finally close(io) @@ -3546,7 +3570,7 @@ end function read_dependency_src(cachefile::String, filename::AbstractString) io = open(cachefile, "r") try - iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) + iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile.")) return read_dependency_src(io, cachefile, filename) finally close(io) @@ -3830,9 +3854,9 @@ end try checksum = isvalid_cache_header(io) if iszero(checksum) - @debug "Rejecting cache file $cachefile due to it containing an invalid cache header" - record_reason(reasons, "invalid header") - return true # invalid cache file + @debug "Rejecting cache file $cachefile due to it containing an incompatible cache header" + record_reason(reasons, "incompatible header") + return true # incompatible cache file end modules, (includes, _, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, actual_flags = parse_cache_header(io, cachefile) if isempty(modules) @@ -4135,5 +4159,5 @@ end precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), Nothing)) || @assert false precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), String)) || @assert false -precompile(create_expr_cache, (PkgId, String, String, String, typeof(_concrete_dependencies), Cmd, IO, IO)) || @assert false -precompile(create_expr_cache, (PkgId, String, String, Nothing, typeof(_concrete_dependencies), Cmd, IO, IO)) || @assert false +precompile(create_expr_cache, (PkgId, String, String, String, typeof(_concrete_dependencies), Cmd, CacheFlags, IO, IO)) || @assert false +precompile(create_expr_cache, (PkgId, String, String, Nothing, typeof(_concrete_dependencies), Cmd, CacheFlags, IO, IO)) || @assert false diff --git a/base/precompilation.jl b/base/precompilation.jl index a39563178632f..f597acef9b57f 100644 --- a/base/precompilation.jl +++ b/base/precompilation.jl @@ -362,7 +362,7 @@ function printpkgstyle(io, header, msg; color=:green) end const Config = Pair{Cmd, Base.CacheFlags} -const PkgConfig = Tuple{Base.PkgId,Config} +const PkgConfig = Tuple{PkgId,Config} function precompilepkgs(pkgs::Vector{String}=String[]; internal_call::Bool=false, @@ -375,8 +375,22 @@ function precompilepkgs(pkgs::Vector{String}=String[]; # asking for timing disables fancy mode, as timing is shown in non-fancy mode fancyprint::Bool = can_fancyprint(io) && !timing, manifest::Bool=false,) + # monomorphize this to avoid latency problems + _precompilepkgs(pkgs, internal_call, strict, warn_loaded, timing, _from_loading, + configs isa Vector{Config} ? configs : [configs], + IOContext{IO}(io), fancyprint, manifest) +end - configs = configs isa Config ? [configs] : configs +function _precompilepkgs(pkgs::Vector{String}, + internal_call::Bool, + strict::Bool, + warn_loaded::Bool, + timing::Bool, + _from_loading::Bool, + configs::Vector{Config}, + io::IOContext{IO}, + fancyprint::Bool, + manifest::Bool) requested_pkgs = copy(pkgs) # for understanding user intent time_start = time_ns() @@ -393,17 +407,32 @@ function precompilepkgs(pkgs::Vector{String}=String[]; if _from_loading && !Sys.isinteractive() && Base.get_bool_env("JULIA_TESTS", false) # suppress passive loading printing in julia test suite. `JULIA_TESTS` is set in Base.runtests - io = devnull + io = IOContext{IO}(devnull) end + nconfigs = length(configs) hascolor = get(io, :color, false)::Bool color_string(cstr::String, col::Union{Int64, Symbol}) = _color_string(cstr, col, hascolor) stale_cache = Dict{StaleCacheKey, Bool}() - exts = Dict{Base.PkgId, String}() # ext -> parent + exts = Dict{PkgId, String}() # ext -> parent # make a flat map of each dep and its direct deps - depsmap = Dict{Base.PkgId, Vector{Base.PkgId}}() - pkg_exts_map = Dict{Base.PkgId, Vector{Base.PkgId}}() + depsmap = Dict{PkgId, Vector{PkgId}}() + pkg_exts_map = Dict{PkgId, Vector{PkgId}}() + + function describe_pkg(pkg::PkgId, is_direct_dep::Bool, flags::Cmd, cacheflags::Base.CacheFlags) + name = haskey(exts, pkg) ? string(exts[pkg], " → ", pkg.name) : pkg.name + name = is_direct_dep ? name : color_string(name, :light_black) + if nconfigs > 1 && !isempty(flags) + config_str = join(flags, " ") + name *= color_string(" `$config_str`", :light_black) + end + if nconfigs > 1 + config_str = join(Base.translate_cache_flags(cacheflags, Base.DefaultCacheFlags), " ") + name *= color_string(" $config_str", :light_black) + end + return name + end for (dep, deps) in env.deps pkg = Base.PkgId(dep, env.names[dep]) @@ -569,7 +598,6 @@ function precompilepkgs(pkgs::Vector{String}=String[]; end end - nconfigs = length(configs) target = nothing if nconfigs == 1 if !isempty(only(configs)[1]) @@ -584,7 +612,7 @@ function precompilepkgs(pkgs::Vector{String}=String[]; failed_deps = Dict{PkgConfig, String}() precomperr_deps = PkgConfig[] # packages that may succeed after a restart (i.e. loaded packages with no cache file) - print_lock = io isa Base.LibuvStream ? io.lock::ReentrantLock : ReentrantLock() + print_lock = io.io isa Base.LibuvStream ? io.io.lock::ReentrantLock : ReentrantLock() first_started = Base.Event() printloop_should_exit::Bool = !fancyprint # exit print loop immediately if not fancy printing interrupted_or_done = Base.Event() @@ -677,7 +705,7 @@ function precompilepkgs(pkgs::Vector{String}=String[]; n_print_rows = 0 while !printloop_should_exit lock(print_lock) do - term_size = Base.displaysize_(io) + term_size = displaysize(io) num_deps_show = max(term_size[1] - 3, 2) # show at least 2 deps pkg_queue_show = if !interrupted_or_done.set && length(pkg_queue) > num_deps_show last(pkg_queue, num_deps_show) @@ -692,7 +720,7 @@ function precompilepkgs(pkgs::Vector{String}=String[]; bar.max = n_total - n_already_precomp # when sizing to the terminal width subtract a little to give some tolerance to resizing the # window between print cycles - termwidth = Base.displaysize_(io)[2] - 4 + termwidth = displaysize(io)[2] - 4 if !final_loop str = sprint(io -> show_progress(io, bar; termwidth, carriagereturn=false); context=io) print(iostr, Base._truncate_at_width_or_chars(true, str, termwidth), "\n") @@ -700,12 +728,8 @@ function precompilepkgs(pkgs::Vector{String}=String[]; for pkg_config in pkg_queue_show dep, config = pkg_config loaded = warn_loaded && haskey(Base.loaded_modules, dep) - _name = haskey(exts, dep) ? string(exts[dep], " → ", dep.name) : dep.name - name = dep in direct_deps ? _name : string(color_string(_name, :light_black)) - if nconfigs > 1 && !isempty(config[1]) - config_str = "$(join(config[1], " "))" - name *= color_string(" $(config_str)", :light_black) - end + flags, cacheflags = config + name = describe_pkg(dep, dep in direct_deps, flags, cacheflags) line = if pkg_config in precomperr_deps string(color_string(" ? ", Base.warn_color()), name) elseif haskey(failed_deps, pkg_config) @@ -793,15 +817,10 @@ function precompilepkgs(pkgs::Vector{String}=String[]; std_pipe = Base.link_pipe!(Pipe(); reader_supports_async=true, writer_supports_async=true) t_monitor = @async monitor_std(pkg_config, std_pipe; single_requested_pkg) - _name = haskey(exts, pkg) ? string(exts[pkg], " → ", pkg.name) : pkg.name - name = is_direct_dep ? _name : string(color_string(_name, :light_black)) - if nconfigs > 1 && !isempty(flags) - config_str = "$(join(flags, " "))" - name *= color_string(" $(config_str)", :light_black) - end + name = describe_pkg(pkg, is_direct_dep, flags, cacheflags) lock(print_lock) do - if !fancyprint && target === nothing && isempty(pkg_queue) - printpkgstyle(io, :Precompiling, "packages...") + if !fancyprint && isempty(pkg_queue) + printpkgstyle(io, :Precompiling, something(target, "packages...")) end end push!(pkg_queue, pkg_config) diff --git a/base/range.jl b/base/range.jl index 4b5d076dcf436..cee15db39b911 100644 --- a/base/range.jl +++ b/base/range.jl @@ -1680,3 +1680,14 @@ function show(io::IO, r::LogRange{T}) where {T} show(io, length(r)) print(io, ')') end + +# Implementation detail of @world +# The rest of this is defined in essentials.jl, but UnitRange is not available +function _resolve_in_world(worlds::UnitRange, gr::GlobalRef) + # Validate that this binding's reference covers the entire world range + bpart = lookup_binding_partition(UInt(first(worlds)), gr) + if bpart.max_world < last(worlds) + error("Binding does not cover the full world range") + end + _resolve_in_world(UInt(last(worlds)), gr) +end diff --git a/base/runtime_internals.jl b/base/runtime_internals.jl index 645aa55c538b4..ab867f8fcae6d 100644 --- a/base/runtime_internals.jl +++ b/base/runtime_internals.jl @@ -218,9 +218,10 @@ function _fieldnames(@nospecialize t) return t.name.names end -const BINDING_KIND_GLOBAL = 0x0 -const BINDING_KIND_CONST = 0x1 -const BINDING_KIND_CONST_IMPORT = 0x2 +# N.B.: Needs to be synced with julia.h +const BINDING_KIND_CONST = 0x0 +const BINDING_KIND_CONST_IMPORT = 0x1 +const BINDING_KIND_GLOBAL = 0x2 const BINDING_KIND_IMPLICIT = 0x3 const BINDING_KIND_EXPLICIT = 0x4 const BINDING_KIND_IMPORTED = 0x5 @@ -228,6 +229,8 @@ const BINDING_KIND_FAILED = 0x6 const BINDING_KIND_DECLARED = 0x7 const BINDING_KIND_GUARD = 0x8 +is_some_const_binding(kind::UInt8) = (kind == BINDING_KIND_CONST || kind == BINDING_KIND_CONST_IMPORT) + function lookup_binding_partition(world::UInt, b::Core.Binding) ccall(:jl_get_binding_partition, Ref{Core.BindingPartition}, (Any, UInt), b, world) end @@ -236,9 +239,27 @@ function lookup_binding_partition(world::UInt, gr::Core.GlobalRef) ccall(:jl_get_globalref_partition, Ref{Core.BindingPartition}, (Any, UInt), gr, world) end +partition_restriction(bpart::Core.BindingPartition) = ccall(:jl_bpart_get_restriction_value, Any, (Any,), bpart) + binding_kind(bpart::Core.BindingPartition) = ccall(:jl_bpart_get_kind, UInt8, (Any,), bpart) binding_kind(m::Module, s::Symbol) = binding_kind(lookup_binding_partition(tls_world_age(), GlobalRef(m, s))) +""" + delete_binding(mod::Module, sym::Symbol) + +Force the binding `mod.sym` to be undefined again, allowing it be redefined. +Note that this operation is very expensive, requirinig a full scan of all code in the system, +as well as potential recompilation of any methods that (may) have used binding +information. + +!!! warning + The implementation of this functionality is currently incomplete. Do not use + this method on versions that contain this disclaimer except for testing. +""" +function delete_binding(mod::Module, sym::Symbol) + ccall(:jl_disable_binding, Cvoid, (Any,), GlobalRef(mod, sym)) +end + """ fieldname(x::DataType, i::Integer) diff --git a/base/show.jl b/base/show.jl index fb932838ac69a..ee467ae90ff50 100644 --- a/base/show.jl +++ b/base/show.jl @@ -324,8 +324,11 @@ end convert(::Type{IOContext}, io::IOContext) = io convert(::Type{IOContext}, io::IO) = IOContext(io, ioproperties(io))::IOContext +convert(::Type{IOContext{IO_t}}, io::IOContext{IO_t}) where {IO_t} = io +convert(::Type{IOContext{IO_t}}, io::IO) where {IO_t} = IOContext{IO_t}(io, ioproperties(io))::IOContext{IO_t} IOContext(io::IO) = convert(IOContext, io) +IOContext{IO_t}(io::IO) where {IO_t} = convert(IOContext{IO_t}, io) function IOContext(io::IO, KV::Pair) d = ioproperties(io) @@ -427,7 +430,7 @@ get(io::IO, key, default) = default keys(io::IOContext) = keys(io.dict) keys(io::IO) = keys(ImmutableDict{Symbol,Any}()) -displaysize(io::IOContext) = haskey(io, :displaysize) ? io[:displaysize]::Tuple{Int,Int} : Base.displaysize_(io.io) +displaysize(io::IOContext) = haskey(io, :displaysize) ? io[:displaysize]::Tuple{Int,Int} : displaysize(io.io) show_circular(io::IO, @nospecialize(x)) = false function show_circular(io::IOContext, @nospecialize(x)) @@ -1032,6 +1035,21 @@ function is_global_function(tn::Core.TypeName, globname::Union{Symbol,Nothing}) return false end +function check_world_bounded(tn::Core.TypeName) + bnd = ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), tn.module, tn.name, true) + isdefined(bnd, :partitions) || return nothing + partition = @atomic bnd.partitions + while true + if is_some_const_binding(binding_kind(partition)) && partition_restriction(partition) <: tn.wrapper + max_world = @atomic partition.max_world + max_world == typemax(UInt) && return nothing + return Int(partition.min_world):Int(max_world) + end + isdefined(partition, :next) || return nothing + partition = @atomic partition.next + end +end + function show_type_name(io::IO, tn::Core.TypeName) if tn === UnionAll.name # by coincidence, `typeof(Type)` is a valid representation of the UnionAll type. @@ -1043,6 +1061,8 @@ function show_type_name(io::IO, tn::Core.TypeName) sym = (globfunc ? globname : tn.name)::Symbol globfunc && print(io, "typeof(") quo = false + world = check_world_bounded(tn) + world !== nothing && print(io, "@world(") if !(get(io, :compact, false)::Bool) # Print module prefix unless type is visible from module passed to # IOContext If :module is not set, default to Main. @@ -1061,6 +1081,7 @@ function show_type_name(io::IO, tn::Core.TypeName) end end show_sym(io, sym) + world !== nothing && print(io, ", ", world, ")") quo && print(io, ")") globfunc && print(io, ")") nothing @@ -3338,3 +3359,81 @@ end function show(io::IO, ::MIME"text/plain", oc::Core.OpaqueClosure{A, R}) where {A, R} show(io, oc) end + +# printing bindings and partitions +function print_partition(io::IO, partition::Core.BindingPartition) + print(io, partition.min_world) + print(io, ":") + max_world = @atomic partition.max_world + if max_world == typemax(UInt) + print(io, '∞') + else + print(io, max_world) + end + print(io, " - ") + kind = binding_kind(partition) + if is_some_const_binding(kind) + print(io, "constant binding to ") + print(io, partition_restriction(partition)) + elseif kind == BINDING_KIND_GUARD + print(io, "undefined binding - guard entry") + elseif kind == BINDING_KIND_FAILED + print(io, "ambiguous binding - guard entry") + elseif kind == BINDING_KIND_DECLARED + print(io, "undefined, but declared using `global` - guard entry") + elseif kind == BINDING_KIND_IMPLICIT + print(io, "implicit `using` from ") + print(io, partition_restriction(partition)) + elseif kind == BINDING_KIND_EXPLICIT + print(io, "explicit `using` from ") + print(io, partition_restriction(partition)) + elseif kind == BINDING_KIND_IMPORTED + print(io, "explicit `import` from ") + print(io, partition_restriction(partition)) + else + @assert kind == BINDING_KIND_GLOBAL + print(io, "global variable with type ") + print(io, partition_restriction(partition)) + end +end + +function show(io::IO, ::MIME"text/plain", partition::Core.BindingPartition) + print(io, "BindingPartition ") + print_partition(io, partition) +end + +function show(io::IO, ::MIME"text/plain", bnd::Core.Binding) + print(io, "Binding ") + print(io, bnd.globalref) + if !isdefined(bnd, :partitions) + print(io, "No partitions") + else + partition = @atomic bnd.partitions + while true + println(io) + print(io, " ") + print_partition(io, partition) + isdefined(partition, :next) || break + partition = @atomic partition.next + end + end +end + +# Special pretty printing for EvalInto/IncludeInto +function show(io::IO, ii::IncludeInto) + if getglobal(ii.m, :include) === ii + print(io, ii.m) + print(io, ".include") + else + show_default(io, ii) + end +end + +function show(io::IO, ei::Core.EvalInto) + if getglobal(ei.m, :eval) === ei + print(io, ei.m) + print(io, ".eval") + else + show_default(io, ei) + end +end diff --git a/base/stream.jl b/base/stream.jl index 3ca5717be29db..2f00538ad0e96 100644 --- a/base/stream.jl +++ b/base/stream.jl @@ -941,6 +941,7 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int) if bytesavailable(sbuf) >= nb nread = readbytes!(sbuf, a, nb) else + initsize = length(a) newbuf = PipeBuffer(a, maxsize=nb) newbuf.size = newbuf.offset # reset the write pointer to the beginning nread = try @@ -951,7 +952,8 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int) finally s.buffer = sbuf end - compact(newbuf) + _take!(a, _unsafe_take!(newbuf)) + length(a) >= initsize || resize!(a, initsize) end iolock_end() return nread diff --git a/base/sysimg.jl b/base/sysimg.jl index 966ed76751f28..ccc8ef38e81bc 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -32,11 +32,7 @@ Use [`Base.include`](@ref) to evaluate a file into another module. !!! compat "Julia 1.5" Julia 1.5 is required for passing the `mapexpr` argument. """ -include(mapexpr::Function, fname::AbstractString) = Base._include(mapexpr, Main, fname) -function include(fname::AbstractString) - isa(fname, String) || (fname = Base.convert(String, fname)::String) - Base._include(identity, Main, fname) -end +const include = Base.IncludeInto(Main) """ eval(expr) @@ -45,7 +41,7 @@ Evaluate an expression in the global scope of the containing module. Every `Module` (except those defined with `baremodule`) has its own 1-argument definition of `eval`, which evaluates expressions in that module. """ -eval(x) = Core.eval(Main, x) +const eval = Core.EvalInto(Main) # Ensure this file is also tracked pushfirst!(Base._included_files, (@__MODULE__, abspath(@__FILE__))) diff --git a/base/timing.jl b/base/timing.jl index 4880951f0a32d..1de3727756829 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -628,3 +628,38 @@ macro timed(ex) ) end end + +# Exported, documented, and tested in InteractiveUtils +# here so it's possible to time/trace all imports, including InteractiveUtils and its deps +macro time_imports(ex) + quote + try + Base.Threads.atomic_add!(Base.TIMING_IMPORTS, 1) + $(esc(ex)) + finally + Base.Threads.atomic_sub!(Base.TIMING_IMPORTS, 1) + end + end +end + +macro trace_compile(ex) + quote + try + ccall(:jl_force_trace_compile_timing_enable, Cvoid, ()) + $(esc(ex)) + finally + ccall(:jl_force_trace_compile_timing_disable, Cvoid, ()) + end + end +end + +macro trace_dispatch(ex) + quote + try + ccall(:jl_force_trace_dispatch_enable, Cvoid, ()) + $(esc(ex)) + finally + ccall(:jl_force_trace_dispatch_disable, Cvoid, ()) + end + end +end diff --git a/base/util.jl b/base/util.jl index 95d62c4a16e1d..3ce64e50f7e29 100644 --- a/base/util.jl +++ b/base/util.jl @@ -249,7 +249,7 @@ function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Unio end function julia_exename() - if !Base.isdebugbuild() + if !isdebugbuild() return @static Sys.iswindows() ? "julia.exe" : "julia" else return @static Sys.iswindows() ? "julia-debug.exe" : "julia-debug" @@ -530,7 +530,6 @@ function _crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000) end _crc32c(io::IO, crc::UInt32=0x00000000) = _crc32c(io, typemax(Int64), crc) _crc32c(io::IOStream, crc::UInt32=0x00000000) = _crc32c(io, filesize(io)-position(io), crc) -_crc32c(uuid::UUID, crc::UInt32=0x00000000) = _crc32c(uuid.value, crc) _crc32c(x::UInt128, crc::UInt32=0x00000000) = ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt128}, Csize_t), crc, x, 16) _crc32c(x::UInt64, crc::UInt32=0x00000000) = diff --git a/base/uuid.jl b/base/uuid.jl index 9b2da3c6409db..56f3a6aa417e7 100644 --- a/base/uuid.jl +++ b/base/uuid.jl @@ -36,6 +36,8 @@ let Base.hash(uuid::UUID, h::UInt) = hash(uuid_hash_seed, hash(convert(NTuple{2, UInt64}, uuid), h)) end +_crc32c(uuid::UUID, crc::UInt32=0x00000000) = _crc32c(uuid.value, crc) + let @inline function uuid_kernel(s, i, u) _c = UInt32(@inbounds codeunit(s, i)) diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl index 04d13011d6223..037e8926d5003 100644 --- a/contrib/generate_precompile.jl +++ b/contrib/generate_precompile.jl @@ -183,10 +183,10 @@ for match = Base._methods(+, (Int, Int), -1, Base.get_world_counter()) # interactive startup uses this write(IOBuffer(), "") - # not critical, but helps hide unrelated compilation from @time when using --trace-compile - foo() = rand(2,2) * rand(2,2) - @time foo() - @time foo() + # Not critical, but helps hide unrelated compilation from @time when using --trace-compile. + f55729() = Base.Experimental.@force_compile + @time @eval f55729() + @time @eval f55729() break # only actually need to do this once end diff --git a/contrib/juliac-buildscript.jl b/contrib/juliac-buildscript.jl index 50f96198c416b..0303e95f448b5 100644 --- a/contrib/juliac-buildscript.jl +++ b/contrib/juliac-buildscript.jl @@ -17,7 +17,6 @@ task.rngState3 = 0x3a77f7189200c20b task.rngState4 = 0x5502376d099035ae uuid_tuple = (UInt64(0), UInt64(0)) ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), Base.__toplevel__, uuid_tuple) -ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred) # Patch methods in Core and Base @@ -28,6 +27,7 @@ end (f::Base.RedirectStdStream)(io::Core.CoreSTDOUT) = Base._redirect_io_global(io, f.unix_fd) @eval Base begin + depwarn(msg, funcsym; force::Bool=false) = nothing _assert_tostring(msg) = "" reinit_stdio() = nothing JuliaSyntax.enable_in_core!() = nothing @@ -230,20 +230,15 @@ let loaded = Symbol.(Base.loaded_modules_array()) # TODO better way to do this using Artifacts @eval Artifacts begin function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, _::Val{lazyartifacts}) where lazyartifacts - moduleroot = Base.moduleroot(__module__) - if haskey(Base.module_keys, moduleroot) - # Process overrides for this UUID, if we know what it is - process_overrides(artifact_dict, Base.module_keys[moduleroot].uuid) - end - # If the artifact exists, we're in the happy path and we can immediately # return the path to the artifact: - dirs = artifact_paths(hash; honor_overrides=true) + dirs = artifacts_dirs(bytes2hex(hash.bytes)) for dir in dirs if isdir(dir) return jointail(dir, path_tail) end end + error("Artifact not found") end end end @@ -259,6 +254,18 @@ let loaded = Symbol.(Base.loaded_modules_array()) # TODO better way to do this __init__() = rand() end end + if :Markdown in loaded + using Markdown + @eval Markdown begin + __init__() = rand() + end + end + if :JuliaSyntaxHighlighting in loaded + using JuliaSyntaxHighlighting + @eval JuliaSyntaxHighlighting begin + __init__() = rand() + end + end end empty!(Core.ARGS) diff --git a/contrib/juliac.jl b/contrib/juliac.jl index 61e0e91958667..0f008976d2b4f 100644 --- a/contrib/juliac.jl +++ b/contrib/juliac.jl @@ -8,6 +8,7 @@ trim = nothing outname = nothing file = nothing add_ccallables = false +verbose = false help = findfirst(x->x == "--help", ARGS) if help !== nothing @@ -39,6 +40,8 @@ let i = 1 end elseif arg == "--compile-ccallable" global add_ccallables = true + elseif arg == "--verbose" + global verbose = true else if arg[1] == '-' || !isnothing(file) println("Unexpected argument `$arg`") @@ -77,9 +80,8 @@ open(initsrc_path, "w") do io end static_call_graph_arg() = isnothing(trim) ? `` : `--trim=$(trim)` -is_verbose() = verbose ? `--verbose-compilation=yes` : `` cmd = addenv(`$cmd --project=$(Base.active_project()) --output-o $img_path --output-incremental=no --strip-ir --strip-metadata $(static_call_graph_arg()) $(joinpath(@__DIR__,"juliac-buildscript.jl")) $absfile $output_type $add_ccallables`, "OPENBLAS_NUM_THREADS" => 1, "JULIA_NUM_THREADS" => 1) - +verbose && println("Running: $cmd") if !success(pipeline(cmd; stdout, stderr)) println(stderr, "\nFailed to compile $file") exit(1) diff --git a/deps/checksums/Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/md5 b/deps/checksums/Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/md5 deleted file mode 100644 index 137460d1a05a1..0000000000000 --- a/deps/checksums/Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -74d656c054c1406a7e88910d673019f7 diff --git a/deps/checksums/Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/sha512 b/deps/checksums/Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/sha512 deleted file mode 100644 index 0b8463176a867..0000000000000 --- a/deps/checksums/Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -a8e589ce68cc14883a7a21f68862695bfaa9ab38dfa0e704c32aaa801667708af0d851a41199ad09ae81a4c0b928befb680d639c1eca3377ce2db2dcc34b98e5 diff --git a/deps/checksums/Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/md5 b/deps/checksums/Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/md5 new file mode 100644 index 0000000000000..7c0bfbf62bd6e --- /dev/null +++ b/deps/checksums/Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/md5 @@ -0,0 +1 @@ +6fce8506a1701acdcbc4888250eeb86a diff --git a/deps/checksums/Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/sha512 b/deps/checksums/Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/sha512 new file mode 100644 index 0000000000000..06e3ea9c8dfa7 --- /dev/null +++ b/deps/checksums/Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/sha512 @@ -0,0 +1 @@ +e251745da221a82f3ec5e21a76c29df0b695dc4028ee2c719373c08637050318db7b543c9d40074314fc3495738d39fd8af5a7954e8b72695df44e25e395f883 diff --git a/pkgimage.mk b/pkgimage.mk index 0bc035ee03b08..78b2618be549f 100644 --- a/pkgimage.mk +++ b/pkgimage.mk @@ -25,7 +25,8 @@ print-depot-path: @$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e '@show Base.DEPOT_PATH') $(BUILDDIR)/stdlib/%.image: $(JULIAHOME)/stdlib/Project.toml $(JULIAHOME)/stdlib/Manifest.toml $(INDEPENDENT_STDLIBS_SRCS) $(JULIA_DEPOT_PATH)/compiled - @$(call PRINT_JULIA, JULIA_CPU_TARGET="$(JULIA_CPU_TARGET)" $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e 'Base.Precompilation.precompilepkgs(;configs=[``=>Base.CacheFlags(), `--check-bounds=yes`=>Base.CacheFlags(;check_bounds=1)])') + @$(call PRINT_JULIA, JULIA_CPU_TARGET="$(JULIA_CPU_TARGET)" $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e \ + 'Base.Precompilation.precompilepkgs(configs=[``=>Base.CacheFlags(debug_level=2, opt_level=3), ``=>Base.CacheFlags(check_bounds=1, debug_level=2, opt_level=3)])') touch $@ $(BUILDDIR)/stdlib/release.image: $(build_private_libdir)/sys.$(SHLIB_EXT) diff --git a/src/Makefile b/src/Makefile index 75635c2e6c062..3458f51fa5548 100644 --- a/src/Makefile +++ b/src/Makefile @@ -103,7 +103,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0) UV_HEADERS += uv.h UV_HEADERS += uv/*.h endif -PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) +PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h gc-tls-common.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) ifeq ($(OS),WINNT) PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h) endif diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 279686c387e1b..a3ffdf1d051a9 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -295,12 +295,12 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_ jl_value_t *ci = cgparams.lookup(mi, world, world); JL_GC_PROMISE_ROOTED(ci); jl_code_instance_t *codeinst = NULL; - JL_GC_PUSH1(&codeinst); if (ci != jl_nothing && jl_atomic_load_relaxed(&((jl_code_instance_t *)ci)->inferred) != jl_nothing) { codeinst = (jl_code_instance_t*)ci; } else { if (cgparams.lookup != jl_rettype_inferred_addr) { + // XXX: This will corrupt and leak a lot of memory which may be very bad jl_error("Refusing to automatically run type inference with custom cache lookup."); } else { @@ -309,15 +309,129 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_ * it into the cache here, since it was explicitly requested and is * otherwise not reachable from anywhere in the system image. */ - if (!jl_mi_cache_has_ci(mi, codeinst)) + if (codeinst && !jl_mi_cache_has_ci(mi, codeinst)) { + JL_GC_PUSH1(&codeinst); jl_mi_cache_insert(mi, codeinst); + JL_GC_POP(); + } } } - JL_GC_POP(); return codeinst; } -arraylist_t new_invokes; +typedef DenseMap> jl_compiled_functions_t; +static void compile_workqueue(jl_codegen_params_t ¶ms, CompilationPolicy policy, jl_compiled_functions_t &compiled_functions) +{ + decltype(params.workqueue) workqueue; + std::swap(params.workqueue, workqueue); + jl_code_info_t *src = NULL; + jl_code_instance_t *codeinst = NULL; + JL_GC_PUSH2(&src, &codeinst); + assert(!params.cache); + while (!workqueue.empty()) { + auto it = workqueue.pop_back_val(); + codeinst = it.first; + auto &proto = it.second; + // try to emit code for this item from the workqueue + StringRef invokeName = ""; + StringRef preal_decl = ""; + bool preal_specsig = false; + { + auto it = compiled_functions.find(codeinst); + if (it == compiled_functions.end()) { + // Reinfer the function. The JIT came along and removed the inferred + // method body. See #34993 + if ((policy != CompilationPolicy::Default || params.params->trim) && + jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) { + // XXX: SOURCE_MODE_FORCE_SOURCE is wrong here (neither sufficient nor necessary) + codeinst = jl_type_infer(codeinst->def, jl_atomic_load_relaxed(&codeinst->max_world), SOURCE_MODE_FORCE_SOURCE); + } + if (codeinst) { + orc::ThreadSafeModule result_m = + jl_create_ts_module(name_from_method_instance(codeinst->def), + params.tsctx, params.DL, params.TargetTriple); + auto decls = jl_emit_codeinst(result_m, codeinst, NULL, params); + if (result_m) + it = compiled_functions.insert(std::make_pair(codeinst, std::make_pair(std::move(result_m), std::move(decls)))).first; + } + } + if (it != compiled_functions.end()) { + auto &decls = it->second.second; + invokeName = decls.functionObject; + if (decls.functionObject == "jl_fptr_args") { + preal_decl = decls.specFunctionObject; + } + else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") { + preal_decl = decls.specFunctionObject; + preal_specsig = true; + } + } + } + // patch up the prototype we emitted earlier + Module *mod = proto.decl->getParent(); + assert(proto.decl->isDeclaration()); + Function *pinvoke = nullptr; + if (preal_decl.empty()) { + if (invokeName.empty() && params.params->trim) { + errs() << "Bailed out to invoke when compiling:"; + jl_(codeinst->def); + abort(); + } + pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params); + if (!proto.specsig) + proto.decl->replaceAllUsesWith(pinvoke); + } + if (proto.specsig && !preal_specsig) { + // get or build an fptr1 that can invoke codeinst + if (pinvoke == nullptr) + pinvoke = get_or_emit_fptr1(preal_decl, mod); + // emit specsig-to-(jl)invoke conversion + proto.decl->setLinkage(GlobalVariable::InternalLinkage); + //protodecl->setAlwaysInline(); + jl_init_function(proto.decl, params.TargetTriple); + jl_method_instance_t *mi = codeinst->def; + size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed + bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; + // TODO: maybe this can be cached in codeinst->specfptr? + emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke, 0, 0); + preal_decl = ""; // no need to fixup the name + } + if (!preal_decl.empty()) { + // merge and/or rename this prototype to the real function + if (Value *specfun = mod->getNamedValue(preal_decl)) { + if (proto.decl != specfun) + proto.decl->replaceAllUsesWith(specfun); + } + else { + proto.decl->setName(preal_decl); + } + } + if (proto.oc) { // additionally, if we are dealing with an oc, then we might also need to fix up the fptr1 reference too + assert(proto.specsig); + StringRef ocinvokeDecl = invokeName; + // if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too + // XXX: this invoke translation logic is supposed to exactly match new_opaque_closure + if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return") + ocinvokeDecl = pinvoke->getName(); + assert(!ocinvokeDecl.empty()); + assert(ocinvokeDecl != "jl_fptr_args"); + assert(ocinvokeDecl != "jl_fptr_sparam"); + // merge and/or rename this prototype to the real function + if (Value *specfun = mod->getNamedValue(ocinvokeDecl)) { + if (proto.oc != specfun) + proto.oc->replaceAllUsesWith(specfun); + } + else { + proto.oc->setName(ocinvokeDecl); + } + } + workqueue.append(params.workqueue); + params.workqueue.clear(); + } + JL_GC_POP(); +} + + // takes the running content that has collected in the shadow module and dump it to disk // this builds the object file portion of the sysimage files for fast startup, and can // also be used be extern consumers like GPUCompiler.jl to obtain a module containing @@ -346,7 +460,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm orc::ThreadSafeContext ctx; orc::ThreadSafeModule backing; if (!llvmmod) { - ctx = jl_ExecutionEngine->acquireContext(); + ctx = jl_ExecutionEngine->makeContext(); backing = jl_create_ts_module("text", ctx); } orc::ThreadSafeModule &clone = llvmmod ? *unwrap(llvmmod) : backing; @@ -367,11 +481,11 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm params.imaging_mode = imaging; params.debug_level = cgparams->debug_info_level; params.external_linkage = _external_linkage; - arraylist_new(&new_invokes, 0); size_t compile_for[] = { jl_typeinf_world, _world }; int worlds = 0; if (jl_options.trim != JL_TRIM_NO) worlds = 1; + jl_compiled_functions_t compiled_functions; for (; worlds < 2; worlds++) { JL_TIMING(NATIVE_AOT, NATIVE_Codegen); size_t this_world = compile_for[worlds]; @@ -391,7 +505,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm continue; } mi = (jl_method_instance_t*)item; -compile_mi: src = NULL; // if this method is generally visible to the current compilation world, // and this is either the primary world, or not applicable in the primary world @@ -406,7 +519,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm jl_(mi); abort(); } - if (codeinst && !params.compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) { + if (codeinst && !compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) { // now add it to our compilation results // Const returns do not do codegen, but juliac inspects codegen results so make a dummy fvar entry to represent it if (jl_options.trim != JL_TRIM_NO && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) { @@ -418,7 +531,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm Triple(clone.getModuleUnlocked()->getTargetTriple())); jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, NULL, params); if (result_m) - params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; + compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; else if (jl_options.trim != JL_TRIM_NO) { // if we're building a small image, we need to compile everything // to ensure that we have all the information we need. @@ -428,26 +541,19 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm } } } - } else if (this_world != jl_typeinf_world) { + } + else if (this_world != jl_typeinf_world) { /* jl_safe_printf("Codegen could not find requested codeinstance to be compiled\n"); jl_(mi); abort(); */ } - // TODO: is goto the best way to do this? - jl_compile_workqueue(params, policy); - mi = (jl_method_instance_t*)arraylist_pop(&new_invokes); - if (mi != NULL) { - goto compile_mi; - } } - - // finally, make sure all referenced methods also get compiled or fixed up - jl_compile_workqueue(params, policy); } JL_GC_POP(); - arraylist_free(&new_invokes); + // finally, make sure all referenced methods also get compiled or fixed up + compile_workqueue(params, policy, compiled_functions); // process the globals array, before jl_merge_module destroys them SmallVector gvars(params.global_targets.size()); @@ -464,7 +570,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm data->jl_value_to_llvm[idx] = global.first; idx++; } - CreateNativeMethods += params.compiled_functions.size(); + CreateNativeMethods += compiled_functions.size(); size_t offset = gvars.size(); data->jl_external_to_llvm.resize(params.external_fns.size()); @@ -489,7 +595,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm { JL_TIMING(NATIVE_AOT, NATIVE_Merge); Linker L(*clone.getModuleUnlocked()); - for (auto &def : params.compiled_functions) { + for (auto &def : compiled_functions) { jl_merge_module(clone, std::move(std::get<0>(def.second))); jl_code_instance_t *this_code = def.first; jl_llvm_functions_t decls = std::get<1>(def.second); @@ -573,9 +679,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm } ct->reentrant_timing &= ~1ull; } - if (ctx.getContext()) { - jl_ExecutionEngine->releaseContext(std::move(ctx)); - } return (void*)data; } @@ -1975,11 +2078,6 @@ void jl_dump_native_impl(void *native_code, } } -void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis) -{ - PM->add(new TargetLibraryInfoWrapperPass(triple)); - PM->add(createTargetTransformInfoWrapperPass(std::move(analysis))); -} // sometimes in GDB you want to find out what code would be created from a mi extern "C" JL_DLLEXPORT_CODEGEN jl_code_info_t *jl_gdbdumpcode(jl_method_instance_t *mi) @@ -2037,8 +2135,8 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, jl_ dump->F = nullptr; dump->TSM = nullptr; if (src && jl_is_code_info(src)) { - auto ctx = jl_ExecutionEngine->getContext(); - orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), *ctx); + auto ctx = jl_ExecutionEngine->makeContext(); + orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), ctx); uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) @@ -2046,7 +2144,7 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, jl_ auto target_info = m.withModuleDo([&](Module &M) { return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple())); }); - jl_codegen_params_t output(*ctx, std::move(target_info.first), std::move(target_info.second)); + jl_codegen_params_t output(ctx, std::move(target_info.first), std::move(target_info.second)); output.params = ¶ms; output.imaging_mode = imaging_default(); // This would be nice, but currently it causes some assembly regressions that make printed output diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp index 8557698a4e513..c257d2a2e3331 100644 --- a/src/cgmemmgr.cpp +++ b/src/cgmemmgr.cpp @@ -32,14 +32,14 @@ namespace { -static size_t get_block_size(size_t size) +static size_t get_block_size(size_t size) JL_NOTSAFEPOINT { return (size > jl_page_size * 256 ? LLT_ALIGN(size, jl_page_size) : jl_page_size * 256); } // Wrapper function to mmap/munmap/mprotect pages... -static void *map_anon_page(size_t size) +static void *map_anon_page(size_t size) JL_NOTSAFEPOINT { #ifdef _OS_WINDOWS_ char *mem = (char*)VirtualAlloc(NULL, size + jl_page_size, @@ -54,7 +54,7 @@ static void *map_anon_page(size_t size) return mem; } -static void unmap_page(void *ptr, size_t size) +static void unmap_page(void *ptr, size_t size) JL_NOTSAFEPOINT { #ifdef _OS_WINDOWS_ VirtualFree(ptr, size, MEM_DECOMMIT); @@ -71,7 +71,7 @@ enum class Prot : int { NO = PAGE_NOACCESS }; -static void protect_page(void *ptr, size_t size, Prot flags) +static void protect_page(void *ptr, size_t size, Prot flags) JL_NOTSAFEPOINT { DWORD old_prot; if (!VirtualProtect(ptr, size, (DWORD)flags, &old_prot)) { @@ -89,7 +89,7 @@ enum class Prot : int { NO = PROT_NONE }; -static void protect_page(void *ptr, size_t size, Prot flags) +static void protect_page(void *ptr, size_t size, Prot flags) JL_NOTSAFEPOINT { int ret = mprotect(ptr, size, (int)flags); if (ret != 0) { @@ -98,7 +98,7 @@ static void protect_page(void *ptr, size_t size, Prot flags) } } -static bool check_fd_or_close(int fd) +static bool check_fd_or_close(int fd) JL_NOTSAFEPOINT { if (fd == -1) return false; @@ -129,7 +129,7 @@ static intptr_t anon_hdl = -1; // Also, creating big file mapping and then map pieces of it seems to // consume too much global resources. Therefore, we use each file mapping // as a block on windows -static void *create_shared_map(size_t size, size_t id) +static void *create_shared_map(size_t size, size_t id) JL_NOTSAFEPOINT { void *addr = MapViewOfFile((HANDLE)id, FILE_MAP_ALL_ACCESS, 0, 0, size); @@ -137,13 +137,13 @@ static void *create_shared_map(size_t size, size_t id) return addr; } -static intptr_t init_shared_map() +static intptr_t init_shared_map() JL_NOTSAFEPOINT { anon_hdl = 0; return 0; } -static void *alloc_shared_page(size_t size, size_t *id, bool exec) +static void *alloc_shared_page(size_t size, size_t *id, bool exec) JL_NOTSAFEPOINT { assert(size % jl_page_size == 0); DWORD file_mode = exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; @@ -162,7 +162,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec) } #else // _OS_WINDOWS_ // For shared mapped region -static intptr_t get_anon_hdl(void) +static intptr_t get_anon_hdl(void) JL_NOTSAFEPOINT { int fd = -1; @@ -228,7 +228,7 @@ static struct _make_shared_map_lock { }; } shared_map_lock; -static size_t get_map_size_inc() +static size_t get_map_size_inc() JL_NOTSAFEPOINT { rlimit rl; if (getrlimit(RLIMIT_FSIZE, &rl) != -1) { @@ -242,7 +242,7 @@ static size_t get_map_size_inc() return map_size_inc_default; } -static void *create_shared_map(size_t size, size_t id) +static void *create_shared_map(size_t size, size_t id) JL_NOTSAFEPOINT { void *addr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, anon_hdl, id); @@ -250,7 +250,7 @@ static void *create_shared_map(size_t size, size_t id) return addr; } -static intptr_t init_shared_map() +static intptr_t init_shared_map() JL_NOTSAFEPOINT { anon_hdl = get_anon_hdl(); if (anon_hdl == -1) @@ -265,7 +265,7 @@ static intptr_t init_shared_map() return anon_hdl; } -static void *alloc_shared_page(size_t size, size_t *id, bool exec) +static void *alloc_shared_page(size_t size, size_t *id, bool exec) JL_NOTSAFEPOINT { assert(size % jl_page_size == 0); size_t off = jl_atomic_fetch_add(&map_offset, size); @@ -292,7 +292,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec) #ifdef _OS_LINUX_ // Using `/proc/self/mem`, A.K.A. Keno's remote memory manager. -ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr) +ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr) JL_NOTSAFEPOINT { static_assert(sizeof(off_t) >= 8, "off_t is smaller than 64bits"); #ifdef _P64 @@ -319,7 +319,7 @@ ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr) // Do not call this directly. // Use `get_self_mem_fd` which has a guard to call this only once. -static int _init_self_mem() +static int _init_self_mem() JL_NOTSAFEPOINT { struct utsname kernel; uname(&kernel); @@ -359,13 +359,13 @@ static int _init_self_mem() return fd; } -static int get_self_mem_fd() +static int get_self_mem_fd() JL_NOTSAFEPOINT { static int fd = _init_self_mem(); return fd; } -static void write_self_mem(void *dest, void *ptr, size_t size) +static void write_self_mem(void *dest, void *ptr, size_t size) JL_NOTSAFEPOINT { while (size > 0) { ssize_t ret = pwrite_addr(get_self_mem_fd(), ptr, size, (uintptr_t)dest); @@ -424,7 +424,7 @@ struct Block { Block(const Block&) = delete; Block &operator=(const Block&) = delete; - Block(Block &&other) + Block(Block &&other) JL_NOTSAFEPOINT : ptr(other.ptr), total(other.total), avail(other.avail) @@ -433,9 +433,9 @@ struct Block { other.total = other.avail = 0; } - Block() = default; + Block() JL_NOTSAFEPOINT = default; - void *alloc(size_t size, size_t align) + void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT { size_t aligned_avail = avail & (-align); if (aligned_avail < size) @@ -444,7 +444,7 @@ struct Block { avail = aligned_avail - size; return p; } - void reset(void *addr, size_t size) + void reset(void *addr, size_t size) JL_NOTSAFEPOINT { if (avail >= jl_page_size) { uintptr_t end = uintptr_t(ptr) + total; @@ -462,7 +462,8 @@ class RWAllocator { static constexpr int nblocks = 8; Block blocks[nblocks]{}; public: - void *alloc(size_t size, size_t align) + RWAllocator() JL_NOTSAFEPOINT = default; + void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT { size_t min_size = (size_t)-1; int min_id = 0; @@ -498,9 +499,9 @@ struct SplitPtrBlock : public Block { uintptr_t wr_ptr{0}; uint32_t state{0}; - SplitPtrBlock() = default; + SplitPtrBlock() JL_NOTSAFEPOINT = default; - void swap(SplitPtrBlock &other) + void swap(SplitPtrBlock &other) JL_NOTSAFEPOINT { std::swap(ptr, other.ptr); std::swap(total, other.total); @@ -509,7 +510,7 @@ struct SplitPtrBlock : public Block { std::swap(state, other.state); } - SplitPtrBlock(SplitPtrBlock &&other) + SplitPtrBlock(SplitPtrBlock &&other) JL_NOTSAFEPOINT : SplitPtrBlock() { swap(other); @@ -534,11 +535,12 @@ class ROAllocator { // but might not have all the permissions set or data copied yet. SmallVector completed; virtual void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, - size_t size, size_t align) = 0; - virtual SplitPtrBlock alloc_block(size_t size) = 0; + size_t size, size_t align) JL_NOTSAFEPOINT = 0; + virtual SplitPtrBlock alloc_block(size_t size) JL_NOTSAFEPOINT = 0; public: - virtual ~ROAllocator() {} - virtual void finalize() + ROAllocator() JL_NOTSAFEPOINT = default; + virtual ~ROAllocator() JL_NOTSAFEPOINT {} + virtual void finalize() JL_NOTSAFEPOINT { for (auto &alloc: allocations) { // ensure the mapped pages are consistent @@ -552,7 +554,7 @@ class ROAllocator { } // Allocations that have not been finalized yet. SmallVector allocations; - void *alloc(size_t size, size_t align) + void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT { size_t min_size = (size_t)-1; int min_id = 0; @@ -603,7 +605,7 @@ class ROAllocator { template class DualMapAllocator : public ROAllocator { protected: - void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override + void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override JL_NOTSAFEPOINT { assert((char*)rt_ptr >= block.ptr && (char*)rt_ptr < (block.ptr + block.total)); @@ -618,7 +620,7 @@ class DualMapAllocator : public ROAllocator { } return (char*)rt_ptr + (block.wr_ptr - uintptr_t(block.ptr)); } - SplitPtrBlock alloc_block(size_t size) override + SplitPtrBlock alloc_block(size_t size) override JL_NOTSAFEPOINT { SplitPtrBlock new_block; // use `wr_ptr` to record the id initially @@ -626,7 +628,7 @@ class DualMapAllocator : public ROAllocator { new_block.reset(ptr, size); return new_block; } - void finalize_block(SplitPtrBlock &block, bool reset) + void finalize_block(SplitPtrBlock &block, bool reset) JL_NOTSAFEPOINT { // This function handles setting the block to the right mode // and free'ing maps that are not needed anymore. @@ -662,11 +664,11 @@ class DualMapAllocator : public ROAllocator { } } public: - DualMapAllocator() + DualMapAllocator() JL_NOTSAFEPOINT { assert(anon_hdl != -1); } - void finalize() override + void finalize() override JL_NOTSAFEPOINT { for (auto &block : this->blocks) { finalize_block(block, false); @@ -685,7 +687,7 @@ class SelfMemAllocator : public ROAllocator { SmallVector temp_buff; protected: void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, - size_t size, size_t align) override + size_t size, size_t align) override JL_NOTSAFEPOINT { assert(!(block.state & SplitPtrBlock::InitAlloc)); for (auto &wr_block: temp_buff) { @@ -699,13 +701,13 @@ class SelfMemAllocator : public ROAllocator { new_block.reset(map_anon_page(block_size), block_size); return new_block.alloc(size, align); } - SplitPtrBlock alloc_block(size_t size) override + SplitPtrBlock alloc_block(size_t size) override JL_NOTSAFEPOINT { SplitPtrBlock new_block; new_block.reset(map_anon_page(size), size); return new_block; } - void finalize_block(SplitPtrBlock &block, bool reset) + void finalize_block(SplitPtrBlock &block, bool reset) JL_NOTSAFEPOINT { if (!(block.state & SplitPtrBlock::Alloc)) return; @@ -718,13 +720,13 @@ class SelfMemAllocator : public ROAllocator { } } public: - SelfMemAllocator() + SelfMemAllocator() JL_NOTSAFEPOINT : ROAllocator(), temp_buff() { assert(get_self_mem_fd() != -1); } - void finalize() override + void finalize() override JL_NOTSAFEPOINT { for (auto &block : this->blocks) { finalize_block(block, false); @@ -770,17 +772,15 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { RWAllocator rw_alloc; std::unique_ptr> ro_alloc; std::unique_ptr> exe_alloc; - bool code_allocated; size_t total_allocated; public: - RTDyldMemoryManagerJL() + RTDyldMemoryManagerJL() JL_NOTSAFEPOINT : SectionMemoryManager(), pending_eh(), rw_alloc(), ro_alloc(), exe_alloc(), - code_allocated(false), total_allocated(0) { #ifdef _OS_LINUX_ @@ -794,12 +794,12 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { exe_alloc.reset(new DualMapAllocator()); } } - ~RTDyldMemoryManagerJL() override + ~RTDyldMemoryManagerJL() override JL_NOTSAFEPOINT { } - size_t getTotalBytes() { return total_allocated; } + size_t getTotalBytes() JL_NOTSAFEPOINT { return total_allocated; } void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) override; + size_t Size) override JL_NOTSAFEPOINT; #if 0 // Disable for now since we are not actually using this. void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, @@ -807,16 +807,16 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { #endif uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, - StringRef SectionName) override; + StringRef SectionName) override JL_NOTSAFEPOINT; uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, StringRef SectionName, - bool isReadOnly) override; + bool isReadOnly) override JL_NOTSAFEPOINT; using SectionMemoryManager::notifyObjectLoaded; void notifyObjectLoaded(RuntimeDyld &Dyld, - const object::ObjectFile &Obj) override; - bool finalizeMemory(std::string *ErrMsg = nullptr) override; + const object::ObjectFile &Obj) override JL_NOTSAFEPOINT; + bool finalizeMemory(std::string *ErrMsg = nullptr) override JL_NOTSAFEPOINT; template - void mapAddresses(DL &Dyld, Alloc &&allocator) + void mapAddresses(DL &Dyld, Alloc &&allocator) JL_NOTSAFEPOINT { for (auto &alloc: allocator->allocations) { if (alloc.rt_addr == alloc.wr_addr || alloc.relocated) @@ -826,7 +826,7 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { } } template - void mapAddresses(DL &Dyld) + void mapAddresses(DL &Dyld) JL_NOTSAFEPOINT { if (!ro_alloc) return; @@ -838,14 +838,9 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, - StringRef SectionName) + StringRef SectionName) JL_NOTSAFEPOINT { // allocating more than one code section can confuse libunwind. -#if !defined(_COMPILER_MSAN_ENABLED_) && !defined(_COMPILER_ASAN_ENABLED_) - // TODO: Figure out why msan and now asan too need this. - assert(!code_allocated); - code_allocated = true; -#endif total_allocated += Size; jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size); jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, Size); @@ -859,7 +854,7 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, StringRef SectionName, - bool isReadOnly) + bool isReadOnly) JL_NOTSAFEPOINT { total_allocated += Size; jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size); @@ -873,7 +868,7 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size, } void RTDyldMemoryManagerJL::notifyObjectLoaded(RuntimeDyld &Dyld, - const object::ObjectFile &Obj) + const object::ObjectFile &Obj) JL_NOTSAFEPOINT { if (!ro_alloc) { assert(!exe_alloc); @@ -884,9 +879,8 @@ void RTDyldMemoryManagerJL::notifyObjectLoaded(RuntimeDyld &Dyld, mapAddresses(Dyld); } -bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg) +bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg) JL_NOTSAFEPOINT { - code_allocated = false; if (ro_alloc) { ro_alloc->finalize(); assert(exe_alloc); @@ -904,7 +898,7 @@ bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg) void RTDyldMemoryManagerJL::registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) + size_t Size) JL_NOTSAFEPOINT { if (uintptr_t(Addr) == LoadAddr) { register_eh_frames(Addr, Size); @@ -917,7 +911,7 @@ void RTDyldMemoryManagerJL::registerEHFrames(uint8_t *Addr, #if 0 void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) + size_t Size) JL_NOTSAFEPOINT { deregister_eh_frames((uint8_t*)LoadAddr, Size); } @@ -925,12 +919,12 @@ void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr, } -RTDyldMemoryManager* createRTDyldMemoryManager() +RTDyldMemoryManager* createRTDyldMemoryManager() JL_NOTSAFEPOINT { return new RTDyldMemoryManagerJL(); } -size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) +size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT { return ((RTDyldMemoryManagerJL*)mm)->getTotalBytes(); } diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 4547e693755cd..a166b0a2c4800 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -4213,7 +4213,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg else { strct = UndefValue::get(lt); if (nargs < nf) - strct = ctx.builder.CreateFreeze(strct); + strct = ctx.builder.CreateFreeze(strct); // Change this to zero initialize instead? } } else if (tracked.second) { @@ -4380,25 +4380,18 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg ctx.builder.restoreIP(savedIP); } } - for (size_t i = nargs; i < nf; i++) { - if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) { - ssize_t offs = jl_field_offset(sty, i); - ssize_t ptrsoffs = -1; - if (!inline_roots.empty()) - std::tie(offs, ptrsoffs) = split_value_field(sty, i); - assert(ptrsoffs < 0 && offs >= 0); - int fsz = jl_field_size(sty, i) - 1; - if (init_as_value) { + if (init_as_value) { + for (size_t i = nargs; i < nf; i++) { + if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) { + ssize_t offs = jl_field_offset(sty, i); + ssize_t ptrsoffs = -1; + if (!inline_roots.empty()) + std::tie(offs, ptrsoffs) = split_value_field(sty, i); + assert(ptrsoffs < 0 && offs >= 0); + int fsz = jl_field_size(sty, i) - 1; unsigned llvm_idx = convert_struct_offset(ctx, cast(lt), offs + fsz); strct = ctx.builder.CreateInsertValue(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), ArrayRef(llvm_idx)); } - else { - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte); - Instruction *dest = cast(emit_ptrgep(ctx, strct, offs + fsz)); - if (promotion_point == nullptr) - promotion_point = dest; - ai.decorateInst(ctx.builder.CreateAlignedStore(ctx.builder.getInt8(0), dest, Align(1))); - } } } if (nargs < nf) { @@ -4407,9 +4400,9 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg if (promotion_point) ctx.builder.SetInsertPoint(promotion_point); if (strct) { - promotion_point = cast(ctx.builder.CreateFreeze(UndefValue::get(lt))); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack); - ai.decorateInst(ctx.builder.CreateStore(promotion_point, strct)); + promotion_point = ai.decorateInst(ctx.builder.CreateMemSet(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), + jl_datatype_size(ty), MaybeAlign(jl_datatype_align(ty)))); } ctx.builder.restoreIP(savedIP); } diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp index 31631eb70a4ad..830fe322a0a38 100644 --- a/src/clangsa/GCChecker.cpp +++ b/src/clangsa/GCChecker.cpp @@ -31,7 +31,7 @@ namespace { using namespace clang; using namespace ento; -#define PDP std::shared_ptr +typedef std::shared_ptr PDP; #define MakePDP make_unique static const Stmt *getStmtForDiagnostics(const ExplodedNode *N) @@ -394,13 +394,18 @@ PDP GCChecker::SafepointBugVisitor::VisitNode(const ExplodedNode *N, } else { PathDiagnosticLocation Pos = PathDiagnosticLocation::createDeclBegin( N->getLocationContext(), BRC.getSourceManager()); - return MakePDP(Pos, "Tracking JL_NOT_SAFEPOINT annotation here."); + if (Pos.isValid()) + return MakePDP(Pos, "Tracking JL_NOT_SAFEPOINT annotation here."); + //N->getLocation().dump(); } } else if (NewSafepointDisabled == (unsigned)-1) { PathDiagnosticLocation Pos = PathDiagnosticLocation::createDeclBegin( N->getLocationContext(), BRC.getSourceManager()); - return MakePDP(Pos, "Safepoints re-enabled here"); + if (Pos.isValid()) + return MakePDP(Pos, "Safepoints re-enabled here"); + //N->getLocation().dump(); } + // n.b. there may be no position here to report if they were disabled by julia_notsafepoint_enter/leave } return nullptr; } @@ -819,6 +824,7 @@ bool GCChecker::isGCTrackedType(QualType QT) { Name.ends_with_insensitive("jl_tupletype_t") || Name.ends_with_insensitive("jl_gc_tracked_buffer_t") || Name.ends_with_insensitive("jl_binding_t") || + Name.ends_with_insensitive("jl_binding_partition_t") || Name.ends_with_insensitive("jl_ordereddict_t") || Name.ends_with_insensitive("jl_tvar_t") || Name.ends_with_insensitive("jl_typemap_t") || @@ -842,6 +848,7 @@ bool GCChecker::isGCTrackedType(QualType QT) { Name.ends_with_insensitive("jl_stenv_t") || Name.ends_with_insensitive("jl_varbinding_t") || Name.ends_with_insensitive("set_world") || + Name.ends_with_insensitive("jl_ptr_kind_union_t") || Name.ends_with_insensitive("jl_codectx_t")) { return true; } diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c index 7ddb68fd6b036..98ac063ba36d6 100644 --- a/src/codegen-stubs.c +++ b/src/codegen-stubs.c @@ -110,22 +110,6 @@ JL_DLLEXPORT uint64_t jl_getUnwindInfo_fallback(uint64_t dwAddr) JL_DLLEXPORT void jl_register_passbuilder_callbacks_fallback(void *PB) { } -#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \ - JL_DLLEXPORT void LLVMExtraMPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE -#define CGSCC_PASS(NAME, CLASS, CREATE_PASS) \ - JL_DLLEXPORT void LLVMExtraCGPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE -#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \ - JL_DLLEXPORT void LLVMExtraFPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE -#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \ - JL_DLLEXPORT void LLVMExtraLPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE - -#include "llvm-julia-passes.inc" - -#undef MODULE_PASS -#undef CGSCC_PASS -#undef FUNCTION_PASS -#undef LOOP_PASS - //LLVM C api to the julia JIT JL_DLLEXPORT void* JLJITGetLLVMOrcExecutionSession_fallback(void* JIT) UNAVAILABLE diff --git a/src/codegen.cpp b/src/codegen.cpp index 3f69f4789493a..b0d5038024900 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -233,7 +233,6 @@ STATISTIC(EmittedSpecfunCalls, "Number of specialized calls emitted"); STATISTIC(EmittedInvokes, "Number of invokes emitted"); STATISTIC(EmittedCalls, "Number of calls emitted"); STATISTIC(EmittedUndefVarErrors, "Number of undef var errors emitted"); -STATISTIC(EmittedOpaqueClosureFunctions, "Number of opaque closures emitted"); STATISTIC(EmittedToJLInvokes, "Number of tojlinvoke calls emitted"); STATISTIC(EmittedCFuncInvalidates, "Number of C function invalidates emitted"); STATISTIC(GeneratedCFuncWrappers, "Number of C function wrappers generated"); @@ -1009,6 +1008,11 @@ static const auto jlinvoke_func = new JuliaFunction<>{ {AttributeSet(), Attributes(C, {Attribute::ReadOnly, Attribute::NoCapture})}); }, }; +static const auto jlopaque_closure_call_func = new JuliaFunction<>{ + XSTR(jl_f_opaque_closure_call), + get_func_sig, + get_func_attrs, +}; static const auto jlmethod_func = new JuliaFunction<>{ XSTR(jl_method_def), [](LLVMContext &C) { @@ -1606,7 +1610,7 @@ static const auto jltuple_func = new JuliaFunction<>{XSTR(jl_f_tuple), get_func_ static const auto jlintrinsic_func = new JuliaFunction<>{XSTR(jl_f_intrinsic_call), get_func3_sig, get_func_attrs}; static const auto &builtin_func_map() { - static std::map*> builtins = { + static auto builtins = new DenseMap*> { { jl_f_is_addr, new JuliaFunction<>{XSTR(jl_f_is), get_func_sig, get_func_attrs} }, { jl_f_typeof_addr, new JuliaFunction<>{XSTR(jl_f_typeof), get_func_sig, get_func_attrs} }, { jl_f_sizeof_addr, new JuliaFunction<>{XSTR(jl_f_sizeof), get_func_sig, get_func_attrs} }, @@ -1649,18 +1653,18 @@ static const auto &builtin_func_map() { { jl_f__svec_ref_addr, new JuliaFunction<>{XSTR(jl_f__svec_ref), get_func_sig, get_func_attrs} }, { jl_f_current_scope_addr, new JuliaFunction<>{XSTR(jl_f_current_scope), get_func_sig, get_func_attrs} }, }; - return builtins; + return *builtins; } static const auto &may_dispatch_builtins() { - static std::unordered_set builtins( + static auto builtins = new DenseSet( {jl_f__apply_iterate_addr, jl_f__apply_pure_addr, jl_f__call_in_world_addr, jl_f__call_in_world_total_addr, jl_f__call_latest_addr, }); - return builtins; + return *builtins; } static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction<>{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs}; @@ -2243,7 +2247,10 @@ static jl_array_t* build_stack_crumbs(jl_codectx_t &ctx) JL_NOTSAFEPOINT break; } if (caller) { - assert(ctx.emission_context.enqueuers.count(caller) == 1); + + // assert(ctx.emission_context.enqueuers.count(caller) == 1); + // Each enqueuer should only be enqueued at least once and only once. Check why this assert is triggering + // This isn't a fatal error, just means that we may get a wrong backtrace if (jl_is_method_instance(caller)) { //TODO: Use a subrange when C++20 is a thing for (auto it2 = std::get(it->second).begin(); it2 != (std::prev(std::get(it->second).end())); ++it2) { @@ -2976,7 +2983,7 @@ static void jl_name_jlfuncparams_args(jl_codegen_params_t ¶ms, Function *F) F->getArg(3)->setName("sparams::Any"); } -static void jl_init_function(Function *F, const Triple &TT) +void jl_init_function(Function *F, const Triple &TT) { // set any attributes that *must* be set on all functions AttrBuilder attr(F->getContext()); @@ -3023,6 +3030,7 @@ static bool uses_specsig(jl_value_t *sig, bool needsparams, jl_value_t *rettype, if (jl_vararg_kind(jl_tparam(sig, jl_nparams(sig) - 1)) == JL_VARARG_UNBOUND) return false; // not invalid, consider if specialized signature is worthwhile + // n.b. sig is sometimes wrong for OC (tparam0 might be the captures type of the specialization, even though what gets passed in that slot is an OC object), so prefer_specsig is always set (instead of recomputing tparam0 using get_oc_type) if (prefer_specsig) return true; if (!deserves_retbox(rettype) && !jl_is_datatype_singleton((jl_datatype_t*)rettype) && rettype != (jl_value_t*)jl_bool_type) @@ -5236,7 +5244,15 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *t if (theF) theArgs.push_back(theF); for (size_t i = 0; i < nargs; i++) { - Value *arg = boxed(ctx, argv[i]); + Value *arg; + if (i == 0 && trampoline == julia_call3) { + const jl_cgval_t &f = argv[i]; + arg = f.inline_roots.empty() && f.ispointer() ? data_pointer(ctx, f) : value_to_pointer(ctx, f).V; + arg = decay_derived(ctx, arg); + } + else { + arg = boxed(ctx, argv[i]); + } theArgs.push_back(arg); } CallInst *result = ctx.builder.CreateCall(TheTrampoline, theArgs); @@ -5283,13 +5299,13 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos idx++; } for (size_t i = 0; i < nargs; i++) { - jl_value_t *jt = jl_nth_slot_type(specTypes, i); // n.b.: specTypes is required to be a datatype by construction for specsig if (is_opaque_closure && i == 0) { // Special implementation for opaque closures: their jt and thus - // julia_type_to_llvm values are likely wrong, so override the - // behavior here to directly pass the expected pointer based instead - // just on passing arg as a pointer + // julia_type_to_llvm values are likely wrong (based on captures instead of the OC), so override the + // behavior here to directly pass the expected pointer directly instead of + // computing it from the available information + // jl_value_t *oc_type = (jl_value_t*)jl_any_type; // more accurately: get_oc_type(specTypes, jlretty) jl_cgval_t arg = argv[i]; if (arg.isghost) { argvals[idx] = Constant::getNullValue(ctx.builder.getPtrTy(AddressSpace::Derived)); @@ -5302,6 +5318,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos idx++; continue; } + jl_value_t *jt = jl_nth_slot_type(specTypes, i); jl_cgval_t arg = update_julia_type(ctx, argv[i], jt); if (arg.typ == jl_bottom_type) return jl_cgval_t(); @@ -5519,6 +5536,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR // Check if we already queued this up auto it = ctx.call_targets.find(codeinst); if (need_to_emit && it != ctx.call_targets.end()) { + assert(it->second.specsig == specsig); protoname = it->second.decl->getName(); need_to_emit = cache_valid = false; } @@ -5559,7 +5577,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR handled = true; if (need_to_emit) { Function *trampoline_decl = cast(jl_Module->getNamedValue(protoname)); - ctx.call_targets[codeinst] = {cc, return_roots, trampoline_decl, specsig}; + ctx.call_targets[codeinst] = {cc, return_roots, trampoline_decl, nullptr, specsig}; if (trim_may_error(ctx.params->trim)) push_frames(ctx, ctx.linfo, mi); } @@ -5570,9 +5588,9 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR if (!handled) { if (trim_may_error(ctx.params->trim)) { if (lival.constant) { - arraylist_push(&new_invokes, lival.constant); push_frames(ctx, ctx.linfo, (jl_method_instance_t*)lival.constant); - } else { + } + else { errs() << "Dynamic call to unknown function"; errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; @@ -5717,10 +5735,34 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo // special case for some known builtin not handled by emit_builtin_call auto it = builtin_func_map().find(builtin_fptr); if (it != builtin_func_map().end()) { - if (trim_may_error(ctx.params->trim) && may_dispatch_builtins().count(builtin_fptr)) { - errs() << "ERROR: Dynamic call to builtin" << jl_symbol_name(((jl_datatype_t*)jl_typeof(f.constant))->name->name); - errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; - print_stacktrace(ctx, ctx.params->trim); + if (trim_may_error(ctx.params->trim)) { + bool may_dispatch = may_dispatch_builtins().count(builtin_fptr); + if (may_dispatch && f.constant == jl_builtin__apply_iterate && nargs >= 4) { + if (jl_subtype(argv[2].typ, (jl_value_t*)jl_builtin_type)) { + static jl_value_t *jl_dispatchfree_apply_iterate_type = NULL; + if (!jl_dispatchfree_apply_iterate_type) { + jl_value_t *types[5] = { + (jl_value_t *)jl_simplevector_type, + (jl_value_t *)jl_genericmemory_type, + (jl_value_t *)jl_array_type, + (jl_value_t *)jl_tuple_type, + (jl_value_t *)jl_namedtuple_type, + }; + jl_dispatchfree_apply_iterate_type = jl_as_global_root(jl_type_union(types, 5), 1); + } + for (size_t i = 3; i < nargs; i++) { + auto ai = argv[i].typ; + if (!jl_subtype(ai, jl_dispatchfree_apply_iterate_type)) + break; + } + may_dispatch = false; + } + } + if (may_dispatch) { + errs() << "ERROR: Dynamic call to builtin " << jl_symbol_name(((jl_datatype_t*)jl_typeof(f.constant))->name->name); + errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; + print_stacktrace(ctx, ctx.params->trim); + } } Value *ret = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), ArrayRef(argv).drop_front(), nargs - 1, julia_call); setName(ctx.emission_context, ret, it->second->name + "_ret"); @@ -5728,20 +5770,21 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo } } FunctionCallee fptr; - Value *F; JuliaFunction<> *cc; if (f.typ == (jl_value_t*)jl_intrinsic_type) { fptr = prepare_call(jlintrinsic_func); - F = f.inline_roots.empty() && f.ispointer() ? data_pointer(ctx, f) : value_to_pointer(ctx, f).V; - F = decay_derived(ctx, F); cc = julia_call3; } else { fptr = FunctionCallee(get_func_sig(ctx.builder.getContext()), ctx.builder.CreateCall(prepare_call(jlgetbuiltinfptr_func), {emit_typeof(ctx, f)})); - F = boxed(ctx, f); cc = julia_call; } - Value *ret = emit_jlcall(ctx, fptr, F, ArrayRef(argv).drop_front(), nargs - 1, cc); + if (trim_may_error(ctx.params->trim)) { + errs() << "ERROR: Dynamic call to unknown builtin"; + errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; + print_stacktrace(ctx, ctx.params->trim); + } + Value *ret = emit_jlcall(ctx, fptr, nullptr, argv, nargs, cc); setName(ctx.emission_context, ret, "Builtin_ret"); return mark_julia_type(ctx, ret, true, rt); } @@ -5758,52 +5801,13 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo JL_GC_POP(); return r; } + // TODO: else emit_oc_call } } int failed_dispatch = !argv[0].constant; if (ctx.params->trim != JL_TRIM_NO) { - size_t min_valid = 1; - size_t max_valid = ~(size_t)0; - size_t latest_world = jl_get_world_counter(); // TODO: marshal the world age of the compilation here. - - // Find all methods matching the call signature - jl_array_t *matches = NULL; - jl_value_t *tup = NULL; - JL_GC_PUSH2(&tup, &matches); - if (!failed_dispatch) { - SmallVector argtypes; - for (auto& arg: argv) - argtypes.push_back(arg.typ); - tup = jl_apply_tuple_type_v(argtypes.data(), argtypes.size()); - matches = (jl_array_t*)jl_matching_methods((jl_tupletype_t*)tup, jl_nothing, 10 /*TODO: make global*/, 1, - latest_world, &min_valid, &max_valid, NULL); - if ((jl_value_t*)matches == jl_nothing) - failed_dispatch = 1; - } - - // Expand each matching method to its unique specialization, if it has exactly one - if (!failed_dispatch) { - size_t k; - size_t len = new_invokes.len; - for (k = 0; k < jl_array_nrows(matches); k++) { - jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k); - jl_method_instance_t *mi = jl_method_match_to_mi(match, latest_world, min_valid, max_valid, 0); - if (!mi) { - if (jl_array_nrows(matches) == 1) { - // if the method match is not compileable, but there is only one, fall back to - // unspecialized implementation - mi = jl_get_unspecialized(match->method); - } - else { - new_invokes.len = len; - failed_dispatch = 1; - break; - } - } - arraylist_push(&new_invokes, mi); - } - } - JL_GC_POP(); + // TODO: Implement the last-minute call resolution that used to be here + // in inference instead. } if (failed_dispatch && trim_may_error(ctx.params->trim)) { @@ -6634,66 +6638,73 @@ static std::pair get_oc_function(jl_codectx_t &ctx, jl_met assert(jl_is_method_instance(mi)); ci = jl_atomic_load_relaxed(&mi->cache); } - - if (ci == NULL || (jl_value_t*)ci == jl_nothing) { - JL_GC_POP(); - return std::make_pair((Function*)NULL, (Function*)NULL); - } - auto inferred = jl_atomic_load_relaxed(&ci->inferred); - if (!inferred || inferred == jl_nothing) { + if (ci == NULL || (jl_value_t*)ci == jl_nothing || ci->rettype != rettype || !jl_egal(sigtype, mi->specTypes)) { // TODO: correctly handle the ABI conversion if rettype != ci->rettype JL_GC_POP(); return std::make_pair((Function*)NULL, (Function*)NULL); } - auto it = ctx.emission_context.compiled_functions.find(ci); - - if (it == ctx.emission_context.compiled_functions.end()) { - ++EmittedOpaqueClosureFunctions; - jl_code_info_t *ir = jl_uncompress_ir(closure_method, ci, (jl_value_t*)inferred); - JL_GC_PUSH1(&ir); - // TODO: Emit this inline and outline it late using LLVM's coroutine support. - orc::ThreadSafeModule closure_m = jl_create_ts_module( - name_from_method_instance(mi), ctx.emission_context.tsctx, - jl_Module->getDataLayout(), Triple(jl_Module->getTargetTriple())); - jl_llvm_functions_t closure_decls = emit_function(closure_m, mi, ir, rettype, ctx.emission_context); - JL_GC_POP(); - it = ctx.emission_context.compiled_functions.insert(std::make_pair(ci, std::make_pair(std::move(closure_m), std::move(closure_decls)))).first; + // method lookup code (similar to emit_invoke, and the inverse of emit_specsig_oc_call) + bool specsig = uses_specsig(sigtype, false, rettype, true); + std::string name; + std::string oc; + StringRef protoname; + StringRef proto_oc; + + // Check if we already queued this up + auto it = ctx.call_targets.find(ci); + bool need_to_emit = it == ctx.call_targets.end(); + if (!need_to_emit) { + assert(specsig == it->second.specsig); + if (specsig) { + protoname = it->second.decl->getName(); + proto_oc = it->second.oc->getName(); + } + else { + proto_oc = it->second.decl->getName(); + } + need_to_emit = false; + } + else { + if (specsig) { + raw_string_ostream(name) << "j_" << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); + protoname = StringRef(name); + } + raw_string_ostream(oc) << "j1_" << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); + proto_oc = StringRef(oc); } - auto &closure_m = it->second.first; - auto &closure_decls = it->second.second; - - assert(closure_decls.functionObject != "jl_fptr_sparam"); - bool isspecsig = closure_decls.functionObject != "jl_fptr_args"; - - Function *F = NULL; - std::string fname = isspecsig ? - closure_decls.functionObject : - closure_decls.specFunctionObject; - if (GlobalValue *V = jl_Module->getNamedValue(fname)) { + // Get the fptr1 OC + Function *F = nullptr; + if (GlobalValue *V = jl_Module->getNamedValue(proto_oc)) { F = cast(V); } else { F = Function::Create(get_func_sig(ctx.builder.getContext()), Function::ExternalLinkage, - fname, jl_Module); + proto_oc, jl_Module); jl_init_function(F, ctx.emission_context.TargetTriple); jl_name_jlfunc_args(ctx.emission_context, F); F->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), F->getAttributes()})); } - Function *specF = NULL; - if (!isspecsig) { - specF = F; + + // Get the specsig (if applicable) + Function *specF = nullptr; + jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed; + unsigned return_roots = 0; + bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; + assert(is_opaque_closure); + if (specsig) { + bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); + jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, nullptr, protoname, mi->specTypes, rettype, is_opaque_closure, gcstack_arg); + cc = returninfo.cc; + return_roots = returninfo.return_roots; + specF = cast(returninfo.decl.getCallee()); } - else { - //emission context holds context lock so can get module - specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject); - if (specF) { - jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, NULL, - closure_decls.specFunctionObject, sigtype, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg)); - specF = cast(returninfo.decl.getCallee()); - } + + if (need_to_emit) { + ctx.call_targets[ci] = {cc, return_roots, specsig ? specF : F, specsig ? F : nullptr, specsig}; } + JL_GC_POP(); return std::make_pair(F, specF); } @@ -7173,7 +7184,12 @@ static Value *get_scope_field(jl_codectx_t &ctx) return emit_ptrgep(ctx, ct, offsetof(jl_task_t, scope), "current_scope"); } -static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t ¶ms) +Function *get_or_emit_fptr1(StringRef preal_decl, Module *M) +{ + return cast(M->getOrInsertFunction(preal_decl, get_func_sig(M->getContext()), get_func_attrs(M->getContext())).getCallee()); +} + +Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t ¶ms) JL_NOTSAFEPOINT { ++EmittedToJLInvokes; jl_codectx_t ctx(M->getContext(), params, codeinst); @@ -7184,7 +7200,6 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptr name, M); jl_init_function(f, params.TargetTriple); if (trim_may_error(params.params->trim)) { - arraylist_push(&new_invokes, codeinst->def); // Try t compile this invoke // TODO: Debuginfo! push_frames(ctx, ctx.linfo, codeinst->def, 1); } @@ -7213,7 +7228,17 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptr return f; } -static void emit_cfunc_invalidate( +static jl_value_t *get_oc_type(jl_value_t *calltype, jl_value_t *rettype) JL_ALWAYS_LEAFTYPE +{ + jl_value_t *argtype = jl_argtype_without_function((jl_value_t*)calltype); + JL_GC_PUSH1(&argtype); + jl_value_t *oc_type JL_ALWAYS_LEAFTYPE = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, argtype, rettype); + JL_GC_PROMISE_ROOTED(oc_type); + JL_GC_POP(); + return oc_type; +} + +void emit_specsig_to_fptr1( Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots, jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure, size_t nargs, @@ -7240,14 +7265,18 @@ static void emit_cfunc_invalidate( ++AI; // gcstack_arg } for (size_t i = 0; i < nargs; i++) { - // n.b. calltype is required to be a datatype by construction for specsig - jl_value_t *jt = jl_nth_slot_type(calltype, i); if (i == 0 && is_for_opaque_closure) { + // `jt` would be wrong here (it is the captures type), so is not used used for + // the ABI decisions, but the argument actually will require boxing as its real type + // which can be exactly recomputed from the specialization, as that defined the ABI + jl_value_t *oc_type = get_oc_type(calltype, rettype); Value *arg_v = &*AI; ++AI; - myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const); + myargs[i] = mark_julia_slot(arg_v, (jl_value_t*)oc_type, NULL, ctx.tbaa().tbaa_const); continue; } + // n.b. calltype is required to be a datatype by construction for specsig + jl_value_t *jt = jl_nth_slot_type(calltype, i); bool isboxed = false; Type *et; if (deserves_argbox(jt)) { @@ -7335,16 +7364,6 @@ static void emit_cfunc_invalidate( } } -static void emit_cfunc_invalidate( - Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots, - jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure, - size_t nargs, jl_codegen_params_t ¶ms, - size_t min_world, size_t max_world) -{ - emit_cfunc_invalidate(gf_thunk, cc, return_roots, calltype, rettype, is_for_opaque_closure, nargs, params, - prepare_call_in(gf_thunk->getParent(), jlapplygeneric_func), min_world, max_world); -} - static Function* gen_cfun_wrapper( Module *into, jl_codegen_params_t ¶ms, const function_sig_t &sig, jl_value_t *ff, const char *aliasname, @@ -7712,11 +7731,11 @@ static Function* gen_cfun_wrapper( GlobalVariable::InternalLinkage, funcName, M); jl_init_function(gf_thunk, ctx.emission_context.TargetTriple); gf_thunk->setAttributes(AttributeList::get(M->getContext(), {returninfo.attrs, gf_thunk->getAttributes()})); - // build a specsig -> jl_apply_generic converter thunk + // build a specsig -> jl_apply_generic converter thunk // this builds a method that calls jl_apply_generic (as a closure over a singleton function pointer), // but which has the signature of a specsig - emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context, - min_world, max_world); + emit_specsig_to_fptr1(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context, + prepare_call_in(gf_thunk->getParent(), jlapplygeneric_func), min_world, max_world); returninfo.decl = FunctionCallee(returninfo.decl.getFunctionType(), ctx.builder.CreateSelect(age_ok, returninfo.decl.getCallee(), gf_thunk)); } retval = emit_call_specfun_other(ctx, is_opaque_closure, lam->specTypes, codeinst->rettype, returninfo, nullptr, inputargs, nargs + 1); @@ -8026,7 +8045,8 @@ const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysi } // generate a julia-callable function that calls f (AKA lam) -static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, jl_returninfo_t &f, unsigned nargs, int retarg, StringRef funcName, +// if is_opaque_closure, then generate the OC invoke, rather than a real invoke +static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, jl_returninfo_t &f, unsigned nargs, int retarg, bool is_opaque_closure, StringRef funcName, Module *M, jl_codegen_params_t ¶ms) { ++GeneratedInvokeWrappers; @@ -8055,11 +8075,14 @@ static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, j allocate_gc_frame(ctx, b0); SmallVector argv(nargs); - bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure; jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); for (size_t i = 0; i < nargs; ++i) { - jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type : - jl_nth_slot_type(lam->specTypes, i); + if (i == 0 && is_opaque_closure) { + jl_value_t *oc_type = (jl_value_t*)jl_any_type; // more accurately: get_oc_type(lam->specTypes, jlretty) + argv[i] = mark_julia_slot(funcArg, oc_type, NULL, ctx.tbaa().tbaa_const); + continue; + } + jl_value_t *ty = jl_nth_slot_type(lam->specTypes, i); Value *theArg; if (i == 0) { theArg = funcArg; @@ -8455,6 +8478,7 @@ static jl_llvm_functions_t // specTypes is required to be a datatype by construction for specsig, but maybe not otherwise // OpaqueClosure implicitly loads the env if (i == 0 && ctx.is_opaque_closure) { + // n.b. this is not really needed, because ty was already supposed to be correct if (jl_is_array(src->slottypes)) { ty = jl_array_ptr_ref((jl_array_t*)src->slottypes, i); } @@ -8554,7 +8578,7 @@ static jl_llvm_functions_t raw_string_ostream(wrapName) << "jfptr_" << ctx.name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); declarations.functionObject = wrapName; size_t nparams = jl_nparams(lam->specTypes); - gen_invoke_wrapper(lam, jlrettype, returninfo, nparams, retarg, declarations.functionObject, M, ctx.emission_context); + gen_invoke_wrapper(lam, jlrettype, returninfo, nparams, retarg, ctx.is_opaque_closure, declarations.functionObject, M, ctx.emission_context); // TODO: add attributes: maybe_mark_argument_dereferenceable(Arg, argType) // TODO: add attributes: dereferenceable // TODO: (if needsparams) add attributes: dereferenceable, readonly, nocapture @@ -8564,11 +8588,10 @@ static jl_llvm_functions_t GlobalVariable::ExternalLinkage, declarations.specFunctionObject, M); jl_init_function(f, ctx.emission_context.TargetTriple); - if (needsparams) { + if (needsparams) jl_name_jlfuncparams_args(ctx.emission_context, f); - } else { + else jl_name_jlfunc_args(ctx.emission_context, f); - } f->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), f->getAttributes()})); returninfo.decl = f; declarations.functionObject = needsparams ? "jl_fptr_sparam" : "jl_fptr_args"; @@ -8940,76 +8963,73 @@ static jl_llvm_functions_t } for (i = 0; i < nreq && i < vinfoslen; i++) { jl_sym_t *s = slot_symbol(ctx, i); - jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i); - // TODO: jl_nth_slot_type should call jl_rewrap_unionall? - // specTypes is required to be a datatype by construction for specsig, but maybe not otherwise - bool isboxed = deserves_argbox(argType); - Type *llvmArgType = NULL; - if (i == 0 && ctx.is_opaque_closure) { - isboxed = false; - llvmArgType = ctx.builder.getPtrTy(AddressSpace::Derived); - argType = (jl_value_t*)jl_any_type; - } - else { - llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType); - } jl_varinfo_t &vi = ctx.slots[i]; - if (s == jl_unused_sym || vi.value.constant) { - assert(vi.boxroot == NULL); - if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType)) { - ++AI; - auto tracked = CountTrackedPointers(llvmArgType); - if (tracked.count && !tracked.all) - ++AI; - } - continue; - } jl_cgval_t theArg; - // If this is an opaque closure, implicitly load the env and switch - // the world age. if (i == 0 && ctx.is_opaque_closure) { + // If this is an opaque closure, implicitly load the env and switch + // the world age. The specTypes value is wrong for this field, so + // this needs to be handled first. + // jl_value_t *oc_type = get_oc_type(calltype, rettype); + Value *oc_this = decay_derived(ctx, &*AI); + ++AI; // both specsig (derived) and fptr1 (box) pass this argument as a distinct argument // Load closure world - Value *oc_this = decay_derived(ctx, &*AI++); - Value *argaddr = oc_this; - Value *worldaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, world)); - + Value *worldaddr = emit_ptrgep(ctx, oc_this, offsetof(jl_opaque_closure_t, world)); jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type, nullptr, nullptr, false, AtomicOrdering::NotAtomic, false, ctx.types().alignof_ptr.value()); ctx.world_age_at_entry = closure_world.V; // The tls world in a OC is the world of the closure emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, ctx.types().alignof_ptr); - // Load closure env - Value *envaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, captures)); + if (s == jl_unused_sym || vi.value.constant) + continue; - jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type, - nullptr, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*)); - theArg = update_julia_type(ctx, closure_env, vi.value.typ); - } - else if (specsig) { - theArg = get_specsig_arg(argType, llvmArgType, isboxed); + // Load closure env, which is always a boxed value (usually some Tuple) currently + Value *envaddr = emit_ptrgep(ctx, oc_this, offsetof(jl_opaque_closure_t, captures)); + theArg = typed_load(ctx, envaddr, NULL, (jl_value_t*)vi.value.typ, + nullptr, nullptr, /*isboxed*/true, AtomicOrdering::NotAtomic, false, sizeof(void*)); } else { - if (i == 0) { - // first (function) arg is separate in jlcall - theArg = mark_julia_type(ctx, fArg, true, vi.value.typ); + jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i); + // TODO: jl_nth_slot_type should call jl_rewrap_unionall? + // specTypes is required to be a datatype by construction for specsig, but maybe not otherwise + bool isboxed = deserves_argbox(argType); + Type *llvmArgType = NULL; + llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType); + if (s == jl_unused_sym || vi.value.constant) { + assert(vi.boxroot == NULL); + if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType)) { + ++AI; + auto tracked = CountTrackedPointers(llvmArgType); + if (tracked.count && !tracked.all) + ++AI; + } + continue; + } + if (specsig) { + theArg = get_specsig_arg(argType, llvmArgType, isboxed); } else { - Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr); - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - Value *load = ai.decorateInst(maybe_mark_load_dereferenceable( - ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))), - false, vi.value.typ)); - theArg = mark_julia_type(ctx, load, true, vi.value.typ); - if (debug_enabled && vi.dinfo && !vi.boxroot) { - SmallVector addr; - addr.push_back(llvm::dwarf::DW_OP_deref); - addr.push_back(llvm::dwarf::DW_OP_plus_uconst); - addr.push_back((i - 1) * sizeof(void*)); - if ((Metadata*)vi.dinfo->getType() != debugcache.jl_pvalue_dillvmt) + if (i == 0) { + // first (function) arg is separate in jlcall + theArg = mark_julia_type(ctx, fArg, true, vi.value.typ); + } + else { + Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr); + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); + Value *load = ai.decorateInst(maybe_mark_load_dereferenceable( + ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))), + false, vi.value.typ)); + theArg = mark_julia_type(ctx, load, true, vi.value.typ); + if (debug_enabled && vi.dinfo && !vi.boxroot) { + SmallVector addr; addr.push_back(llvm::dwarf::DW_OP_deref); - dbuilder.insertDeclare(pargArray, vi.dinfo, dbuilder.createExpression(addr), - topdebugloc, - ctx.builder.GetInsertBlock()); + addr.push_back(llvm::dwarf::DW_OP_plus_uconst); + addr.push_back((i - 1) * sizeof(void*)); + if ((Metadata*)vi.dinfo->getType() != debugcache.jl_pvalue_dillvmt) + addr.push_back(llvm::dwarf::DW_OP_deref); + dbuilder.insertDeclare(pargArray, vi.dinfo, dbuilder.createExpression(addr), + topdebugloc, + ctx.builder.GetInsertBlock()); + } } } } @@ -9996,7 +10016,6 @@ jl_llvm_functions_t jl_emit_code( { JL_TIMING(CODEGEN, CODEGEN_LLVM); jl_timing_show_func_sig((jl_value_t *)li->specTypes, JL_TIMING_DEFAULT_BLOCK); - // caller must hold codegen_lock jl_llvm_functions_t decls = {}; assert((params.params == &jl_default_cgparams /* fast path */ || !params.cache || compare_cgparams(params.params, &jl_default_cgparams)) && @@ -10031,33 +10050,38 @@ jl_llvm_functions_t jl_emit_code( return decls; } +static int effects_foldable(uint32_t effects) +{ + // N.B.: This needs to be kept in sync with Core.Compiler.is_foldable(effects, true) + return ((effects & 0x7) == 0) && // is_consistent(effects) + (((effects >> 10) & 0x03) == 0) && // is_noub(effects) + (((effects >> 3) & 0x03) == 0) && // is_effect_free(effects) + ((effects >> 6) & 0x01); // is_terminates(effects) +} + static jl_llvm_functions_t jl_emit_oc_wrapper(orc::ThreadSafeModule &m, jl_codegen_params_t ¶ms, jl_method_instance_t *mi, jl_value_t *rettype) { - Module *M = m.getModuleUnlocked(); - jl_codectx_t ctx(M->getContext(), params, 0, 0); - ctx.name = M->getModuleIdentifier().data(); - std::string funcName = get_function_name(true, false, ctx.name, ctx.emission_context.TargetTriple); jl_llvm_functions_t declarations; declarations.functionObject = "jl_f_opaque_closure_call"; if (uses_specsig(mi->specTypes, false, rettype, true)) { + // context lock is held by params + Module *M = m.getModuleUnlocked(); + jl_codectx_t ctx(M->getContext(), params, 0, 0); + ctx.name = M->getModuleIdentifier().data(); + std::string funcName = get_function_name(true, false, ctx.name, ctx.emission_context.TargetTriple); jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg)); Function *gf_thunk = cast(returninfo.decl.getCallee()); jl_init_function(gf_thunk, ctx.emission_context.TargetTriple); size_t nrealargs = jl_nparams(mi->specTypes); - emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, mi->specTypes, rettype, true, nrealargs, ctx.emission_context, ctx.min_world, ctx.max_world); + emit_specsig_to_fptr1(gf_thunk, returninfo.cc, returninfo.return_roots, + mi->specTypes, rettype, true, nrealargs, ctx.emission_context, + prepare_call_in(gf_thunk->getParent(), jlopaque_closure_call_func), // TODO: this could call emit_oc_call directly + ctx.min_world, ctx.max_world); declarations.specFunctionObject = funcName; } return declarations; } -static int effects_foldable(uint32_t effects) -{ - // N.B.: This needs to be kept in sync with Core.Compiler.is_foldable(effects, true) - return ((effects & 0x7) == 0) && // is_consistent(effects) - (((effects >> 10) & 0x03) == 0) && // is_noub(effects) - (((effects >> 3) & 0x03) == 0) && // is_effect_free(effects) - ((effects >> 6) & 0x01); // is_terminates(effects) -} jl_llvm_functions_t jl_emit_codeinst( orc::ThreadSafeModule &m, @@ -10070,12 +10094,14 @@ jl_llvm_functions_t jl_emit_codeinst( JL_GC_PUSH1(&src); if (!src) { src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred); - jl_method_t *def = codeinst->def->def.method; + jl_method_instance_t *mi = codeinst->def; + jl_method_t *def = mi->def.method; // Check if this is the generic method for opaque closure wrappers - - // if so, generate the specsig -> invoke converter. + // if so, this must compile specptr such that it holds the specptr -> invoke wrapper + // to satisfy the dispatching implementation requirements of jl_f_opaque_closure_call if (def == jl_opaque_closure_method) { JL_GC_POP(); - return jl_emit_oc_wrapper(m, params, codeinst->def, codeinst->rettype); + return jl_emit_oc_wrapper(m, params, mi, codeinst->rettype); } if (src && (jl_value_t*)src != jl_nothing && jl_is_method(def)) src = jl_uncompress_ir(def, codeinst, (jl_value_t*)src); @@ -10149,135 +10175,15 @@ jl_llvm_functions_t jl_emit_codeinst( return decls; } - -void jl_compile_workqueue( - jl_codegen_params_t ¶ms, - CompilationPolicy policy) -{ - JL_TIMING(CODEGEN, CODEGEN_Workqueue); - jl_code_info_t *src = NULL; - JL_GC_PUSH1(&src); - while (!params.workqueue.empty()) { - jl_code_instance_t *codeinst; - auto it = params.workqueue.back(); - codeinst = it.first; - auto proto = it.second; - params.workqueue.pop_back(); - // try to emit code for this item from the workqueue - StringRef preal_decl = ""; - bool preal_specsig = false; - jl_callptr_t invoke = NULL; - if (params.cache) { - // WARNING: this correctness is protected by an outer lock - uint8_t specsigflags; - void *fptr; - jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0); - //if (specsig ? specsigflags & 0b1 : invoke == jl_fptr_args_addr) - if (invoke == jl_fptr_args_addr) { - preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); - } - else if (specsigflags & 0b1) { - preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); - preal_specsig = true; - } - } - if (preal_decl.empty()) { - auto it = params.compiled_functions.find(codeinst); - if (it == params.compiled_functions.end()) { - // Reinfer the function. The JIT came along and removed the inferred - // method body. See #34993 - if ((policy != CompilationPolicy::Default || params.params->trim) && - jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) { - // XXX: SOURCE_MODE_FORCE_SOURCE is wrong here (neither sufficient nor necessary) - codeinst = jl_type_infer(codeinst->def, jl_atomic_load_relaxed(&codeinst->max_world), SOURCE_MODE_FORCE_SOURCE); - } - if (codeinst) { - orc::ThreadSafeModule result_m = - jl_create_ts_module(name_from_method_instance(codeinst->def), - params.tsctx, params.DL, params.TargetTriple); - auto decls = jl_emit_codeinst(result_m, codeinst, NULL, params); - if (result_m) - it = params.compiled_functions.insert(std::make_pair(codeinst, std::make_pair(std::move(result_m), std::move(decls)))).first; - } - } - if (it != params.compiled_functions.end()) { - auto &decls = it->second.second; - if (decls.functionObject == "jl_fptr_args") { - preal_decl = decls.specFunctionObject; - } - else if (decls.functionObject != "jl_fptr_sparam") { - preal_decl = decls.specFunctionObject; - preal_specsig = true; - } - } - } - // patch up the prototype we emitted earlier - Module *mod = proto.decl->getParent(); - assert(proto.decl->isDeclaration()); - if (proto.specsig) { - // expected specsig - if (!preal_specsig) { - if (params.params->trim) { - auto it = params.compiled_functions.find(codeinst); //TODO: What to do about this - errs() << "Bailed out to invoke when compiling:"; - jl_(codeinst->def); - if (it != params.compiled_functions.end()) { - errs() << it->second.second.functionObject << "\n"; - errs() << it->second.second.specFunctionObject << "\n"; - } else - errs() << "codeinst not in compile_functions\n"; - } - // emit specsig-to-(jl)invoke conversion - StringRef invokeName; - if (invoke != NULL) - invokeName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst); - Function *preal = emit_tojlinvoke(codeinst, invokeName, mod, params); - proto.decl->setLinkage(GlobalVariable::InternalLinkage); - //protodecl->setAlwaysInline(); - jl_init_function(proto.decl, params.TargetTriple); - size_t nrealargs = jl_nparams(codeinst->def->specTypes); // number of actual arguments being passed - // TODO: maybe this can be cached in codeinst->specfptr? - emit_cfunc_invalidate(proto.decl, proto.cc, proto.return_roots, codeinst->def->specTypes, codeinst->rettype, false, nrealargs, params, preal, 0, 0); - preal_decl = ""; // no need to fixup the name - } - else { - assert(!preal_decl.empty()); - } - } - else { - // expected non-specsig - if (preal_decl.empty() || preal_specsig) { - // emit jlcall1-to-(jl)invoke conversion - StringRef invokeName; - if (invoke != NULL) - invokeName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst); - preal_decl = emit_tojlinvoke(codeinst, invokeName, mod, params)->getName(); - } - } - if (!preal_decl.empty()) { - // merge and/or rename this prototype to the real function - if (Value *specfun = mod->getNamedValue(preal_decl)) { - if (proto.decl != specfun) - proto.decl->replaceAllUsesWith(specfun); - } - else { - proto.decl->setName(preal_decl); - } - } - } - JL_GC_POP(); -} - - // --- initialization --- -SmallVector, 0> gv_for_global; +static auto gv_for_global = new SmallVector, 0>(); static void global_jlvalue_to_llvm(JuliaVariable *var, jl_value_t **addr) { - gv_for_global.push_back(std::make_pair(addr, var)); + gv_for_global->push_back(std::make_pair(addr, var)); } static JuliaVariable *julia_const_gv(jl_value_t *val) { - for (auto &kv : gv_for_global) { + for (auto &kv : *gv_for_global) { if (*kv.first == val) return kv.second; } @@ -10286,6 +10192,9 @@ static JuliaVariable *julia_const_gv(jl_value_t *val) static void init_jit_functions(void) { + add_named_global("jl_fptr_args", jl_fptr_args_addr); + add_named_global("jl_fptr_sparam", jl_fptr_sparam_addr); + add_named_global("jl_f_opaque_closure_call", &jl_f_opaque_closure_call); add_named_global(jl_small_typeof_var, &jl_small_typeof); add_named_global(jlstack_chk_guard_var, &__stack_chk_guard); add_named_global(jlRTLD_DEFAULT_var, &jl_RTLD_DEFAULT_handle); diff --git a/src/debug-registry.h b/src/debug-registry.h index 4c9e13d8cd72d..4d0b7a44f19e5 100644 --- a/src/debug-registry.h +++ b/src/debug-registry.h @@ -32,7 +32,7 @@ class JITDebugInfoRegistry std::unique_lock lock; CResourceT &resource; - Lock(std::mutex &mutex, CResourceT &resource) JL_NOTSAFEPOINT : lock(mutex), resource(resource) {} + Lock(std::mutex &mutex, CResourceT &resource) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER : lock(mutex), resource(resource) {} Lock(Lock &&) JL_NOTSAFEPOINT = default; Lock &operator=(Lock &&) JL_NOTSAFEPOINT = default; @@ -56,7 +56,7 @@ class JITDebugInfoRegistry return resource; } - ~Lock() JL_NOTSAFEPOINT = default; + ~Lock() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default; }; private: @@ -68,15 +68,15 @@ class JITDebugInfoRegistry Locked(ResourceT resource = ResourceT()) JL_NOTSAFEPOINT : mutex(), resource(std::move(resource)) {} - LockT operator*() JL_NOTSAFEPOINT { + LockT operator*() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return LockT(mutex, resource); } - ConstLockT operator*() const JL_NOTSAFEPOINT { + ConstLockT operator*() const JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return ConstLockT(mutex, resource); } - ~Locked() JL_NOTSAFEPOINT = default; + ~Locked() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default; }; struct image_info_t { @@ -105,6 +105,7 @@ class JITDebugInfoRegistry std::unique_ptr object; std::unique_ptr context; LazyObjectInfo() = delete; + ~LazyObjectInfo() JL_NOTSAFEPOINT = default; }; struct SectionInfo { @@ -113,6 +114,7 @@ class JITDebugInfoRegistry ptrdiff_t slide; uint64_t SectionIndex; SectionInfo() = delete; + ~SectionInfo() JL_NOTSAFEPOINT = default; }; template @@ -145,7 +147,7 @@ class JITDebugInfoRegistry void add_code_in_flight(llvm::StringRef name, jl_code_instance_t *codeinst, const llvm::DataLayout &DL) JL_NOTSAFEPOINT; jl_method_instance_t *lookupLinfo(size_t pointer) JL_NOTSAFEPOINT; void registerJITObject(const llvm::object::ObjectFile &Object, - std::function getLoadAddress); + std::function getLoadAddress) JL_NOTSAFEPOINT; objectmap_t& getObjectMap() JL_NOTSAFEPOINT; void add_image_info(image_info_t info) JL_NOTSAFEPOINT; bool get_image_info(uint64_t base, image_info_t *info) const JL_NOTSAFEPOINT; diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp index f6fca47e9a889..31f1ba8281a89 100644 --- a/src/debuginfo.cpp +++ b/src/debuginfo.cpp @@ -58,7 +58,7 @@ extern "C" void __register_frame(void*) JL_NOTSAFEPOINT; extern "C" void __deregister_frame(void*) JL_NOTSAFEPOINT; template -static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f) +static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f) JL_NOTSAFEPOINT { const char *P = EHFrameAddr; const char *End = P + EHFrameSize; @@ -164,6 +164,12 @@ static void jl_profile_atomic(T f) JL_NOTSAFEPOINT // --- storing and accessing source location metadata --- void jl_add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL) { + // Non-opaque-closure MethodInstances are considered globally rooted + // through their methods, but for OC, we need to create a global root + // here. + jl_method_instance_t *mi = codeinst->def; + if (jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure) + jl_as_global_root((jl_value_t*)mi, 1); getJITDebugRegistry().add_code_in_flight(name, codeinst, DL); } @@ -369,11 +375,6 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object, if (codeinst) { JL_GC_PROMISE_ROOTED(codeinst); mi = codeinst->def; - // Non-opaque-closure MethodInstances are considered globally rooted - // through their methods, but for OC, we need to create a global root - // here. - if (jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure) - mi = (jl_method_instance_t*)jl_as_global_root((jl_value_t*)mi, 1); } jl_profile_atomic([&]() JL_NOTSAFEPOINT { if (mi) @@ -1281,14 +1282,14 @@ void register_eh_frames(uint8_t *Addr, size_t Size) { // On OS X OS X __register_frame takes a single FDE as an argument. // See http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-April/061768.html - processFDEs((char*)Addr, Size, [](const char *Entry) { + processFDEs((char*)Addr, Size, [](const char *Entry) JL_NOTSAFEPOINT { getJITDebugRegistry().libc_frames.libc_register_frame(Entry); }); } void deregister_eh_frames(uint8_t *Addr, size_t Size) { - processFDEs((char*)Addr, Size, [](const char *Entry) { + processFDEs((char*)Addr, Size, [](const char *Entry) JL_NOTSAFEPOINT { getJITDebugRegistry().libc_frames.libc_deregister_frame(Entry); }); } @@ -1300,7 +1301,7 @@ void deregister_eh_frames(uint8_t *Addr, size_t Size) // Skip over an arbitrary long LEB128 encoding. // Return the pointer to the first unprocessed byte. -static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End) +static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End) JL_NOTSAFEPOINT { const uint8_t *P = Addr; while ((*P >> 7) != 0 && P < End) @@ -1312,7 +1313,7 @@ static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End) // bytes than what there are more bytes than what the type can store. // Adjust the pointer to the first unprocessed byte. template static T parse_leb128(const uint8_t *&Addr, - const uint8_t *End) + const uint8_t *End) JL_NOTSAFEPOINT { typedef typename std::make_unsigned::type uT; uT v = 0; @@ -1335,7 +1336,7 @@ template static T parse_leb128(const uint8_t *&Addr, } template -static U safe_trunc(T t) +static U safe_trunc(T t) JL_NOTSAFEPOINT { assert((t >= static_cast(std::numeric_limits::min())) && (t <= static_cast(std::numeric_limits::max()))); @@ -1375,7 +1376,7 @@ enum DW_EH_PE : uint8_t { }; // Parse the CIE and return the type of encoding used by FDE -static DW_EH_PE parseCIE(const uint8_t *Addr, const uint8_t *End) +static DW_EH_PE parseCIE(const uint8_t *Addr, const uint8_t *End) JL_NOTSAFEPOINT { // https://www.airs.com/blog/archives/460 // Length (4 bytes) @@ -1481,7 +1482,7 @@ void register_eh_frames(uint8_t *Addr, size_t Size) // Now first count the number of FDEs size_t nentries = 0; - processFDEs((char*)Addr, Size, [&](const char*){ nentries++; }); + processFDEs((char*)Addr, Size, [&](const char*) JL_NOTSAFEPOINT { nentries++; }); if (nentries == 0) return; @@ -1510,7 +1511,7 @@ void register_eh_frames(uint8_t *Addr, size_t Size) // CIE's (may not happen) without parsing it every time. const uint8_t *cur_cie = nullptr; DW_EH_PE encoding = DW_EH_PE_omit; - processFDEs((char*)Addr, Size, [&](const char *Entry) { + processFDEs((char*)Addr, Size, [&](const char *Entry) JL_NOTSAFEPOINT { // Skip Length (4bytes) and CIE offset (4bytes) uint32_t fde_size = *(const uint32_t*)Entry; uint32_t cie_id = ((const uint32_t*)Entry)[1]; @@ -1631,7 +1632,7 @@ void deregister_eh_frames(uint8_t *Addr, size_t Size) #endif extern "C" JL_DLLEXPORT_CODEGEN -uint64_t jl_getUnwindInfo_impl(uint64_t dwAddr) +uint64_t jl_getUnwindInfo_impl(uint64_t dwAddr) JL_NOTSAFEPOINT { // Might be called from unmanaged thread jl_lock_profile(); diff --git a/src/engine.cpp b/src/engine.cpp index 6db4dce44e48e..2b68de731c4dd 100644 --- a/src/engine.cpp +++ b/src/engine.cpp @@ -45,8 +45,8 @@ template<> struct llvm::DenseMapInfo { } }; -static std::mutex engine_lock; -static std::condition_variable engine_wait; +static std::mutex engine_lock; // n.b. this lock is only ever held briefly +static std::condition_variable engine_wait; // but it may be waiting a while in this state // map from MethodInstance to threadid that owns it currently for inference static DenseMap Reservations; // vector of which threads are blocked and which lease they need @@ -63,55 +63,51 @@ jl_code_instance_t *jl_engine_reserve(jl_method_instance_t *m, jl_value_t *owner ct->ptls->engine_nqueued++; // disables finalizers until inference is finished on this method graph jl_code_instance_t *ci = jl_new_codeinst_uninit(m, owner); // allocate a placeholder JL_GC_PUSH1(&ci); - int8_t gc_state = jl_gc_safe_enter(ct->ptls); - InferKey key = {m, owner}; - std::unique_lock lock(engine_lock); auto tid = jl_atomic_load_relaxed(&ct->tid); - if ((signed)Awaiting.size() < tid + 1) - Awaiting.resize(tid + 1); - while (1) { - auto record = Reservations.find(key); - if (record == Reservations.end()) { - Reservations[key] = ReservationInfo{tid, ci}; - lock.unlock(); - jl_gc_safe_leave(ct->ptls, gc_state); // contains jl_gc_safepoint - JL_GC_POP(); - return ci; - } - // before waiting, need to run deadlock/cycle detection - // there is a cycle if the thread holding our lease is blocked - // and waiting for (transitively) any lease that is held by this thread - auto wait_tid = record->second.tid; - while (1) { - if (wait_tid == tid) { - lock.unlock(); - jl_gc_safe_leave(ct->ptls, gc_state); // contains jl_gc_safepoint - JL_GC_POP(); - ct->ptls->engine_nqueued--; - return ci; // break the cycle + if (([tid, m, owner, ci] () -> bool { // necessary scope block / lambda for unique_lock + jl_unique_gcsafe_lock lock(engine_lock); + InferKey key{m, owner}; + if ((signed)Awaiting.size() < tid + 1) + Awaiting.resize(tid + 1); + while (1) { + auto record = Reservations.find(key); + if (record == Reservations.end()) { + Reservations[key] = ReservationInfo{tid, ci}; + return false; + } + // before waiting, need to run deadlock/cycle detection + // there is a cycle if the thread holding our lease is blocked + // and waiting for (transitively) any lease that is held by this thread + auto wait_tid = record->second.tid; + while (1) { + if (wait_tid == tid) + return true; + if ((signed)Awaiting.size() <= wait_tid) + break; // no cycle, since it is running (and this should be unreachable) + auto key2 = Awaiting[wait_tid]; + if (key2.mi == nullptr) + break; // no cycle, since it is running + auto record2 = Reservations.find(key2); + if (record2 == Reservations.end()) + break; // no cycle, since it is about to resume + assert(wait_tid != record2->second.tid); + wait_tid = record2->second.tid; + } + Awaiting[tid] = key; + lock.wait(engine_wait); + Awaiting[tid] = InferKey{}; } - if ((signed)Awaiting.size() <= wait_tid) - break; // no cycle, since it is running (and this should be unreachable) - auto key2 = Awaiting[wait_tid]; - if (key2.mi == nullptr) - break; // no cycle, since it is running - auto record2 = Reservations.find(key2); - if (record2 == Reservations.end()) - break; // no cycle, since it is about to resume - assert(wait_tid != record2->second.tid); - wait_tid = record2->second.tid; - } - Awaiting[tid] = key; - engine_wait.wait(lock); - Awaiting[tid] = InferKey{}; - } + })()) + ct->ptls->engine_nqueued--; + JL_GC_POP(); + return ci; } int jl_engine_hasreserved(jl_method_instance_t *m, jl_value_t *owner) { jl_task_t *ct = jl_current_task; InferKey key = {m, owner}; - std::unique_lock lock(engine_lock); + std::unique_lock lock(engine_lock); auto record = Reservations.find(key); return record != Reservations.end() && record->second.tid == jl_atomic_load_relaxed(&ct->tid); } @@ -123,7 +119,7 @@ STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT void jl_engine_sweep(jl_ptls_t *gc_all_tls_states) { - std::unique_lock lock(engine_lock); + std::unique_lock lock(engine_lock); bool any = false; for (auto I = Reservations.begin(); I != Reservations.end(); ++I) { jl_code_instance_t *ci = I->second.ci; @@ -142,7 +138,7 @@ void jl_engine_sweep(jl_ptls_t *gc_all_tls_states) void jl_engine_fulfill(jl_code_instance_t *ci, jl_code_info_t *src) { jl_task_t *ct = jl_current_task; - std::unique_lock lock(engine_lock); + std::unique_lock lock(engine_lock); auto record = Reservations.find(InferKey{ci->def, ci->owner}); if (record == Reservations.end() || record->second.ci != ci) return; @@ -152,7 +148,6 @@ void jl_engine_fulfill(jl_code_instance_t *ci, jl_code_info_t *src) engine_wait.notify_all(); } - #ifdef __cplusplus } #endif diff --git a/src/gc-common.c b/src/gc-common.c index ee461b576ea9e..b552afb8228f0 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -20,6 +20,11 @@ extern "C" { jl_gc_num_t gc_num = {0}; +JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) +{ + return gc_num.total_time; +} + // =========================================================================== // // GC Callbacks // =========================================================================== // @@ -485,10 +490,210 @@ JL_DLLEXPORT void jl_finalize(jl_value_t *o) int gc_n_threads; jl_ptls_t* gc_all_tls_states; +// =========================================================================== // +// Allocation +// =========================================================================== // + +JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc(ptls, sz, ty); +} + +JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sz, NULL); +} + +// allocation wrappers that save the size of allocations, to allow using +// jl_gc_counted_* functions with a libc-compatible API. + +JL_DLLEXPORT void *jl_malloc(size_t sz) +{ + int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); + if (p == NULL) + return NULL; + p[0] = sz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +//_unchecked_calloc does not check for potential overflow of nm*sz +STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { + size_t nmsz = nm*sz; + int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); + if (p == NULL) + return NULL; + p[0] = nmsz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) +{ + if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) + return NULL; + return _unchecked_calloc(nm, sz); +} + +JL_DLLEXPORT void jl_free(void *p) +{ + if (p != NULL) { + int64_t *pp = (int64_t *)p - 2; + size_t sz = pp[0]; + jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); + } +} + +JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) +{ + int64_t *pp; + size_t szold; + if (p == NULL) { + pp = NULL; + szold = 0; + } + else { + pp = (int64_t *)p - 2; + szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; + } + int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); + if (pnew == NULL) + return NULL; + pnew[0] = sz; + return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +// allocator entry points + +JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc_(ptls, sz, ty); +} + +// =========================================================================== // +// Generic Memory +// =========================================================================== // + +size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT +{ + const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; + size_t sz = layout->size * m->length; + if (layout->flags.arrayelem_isunion) + // account for isbits Union array selector bytes + sz += m->length; + return sz; +} + +// tracking Memorys with malloc'd storage +void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ + // This is **NOT** a GC safe point. + mallocmemory_t *ma; + if (ptls->gc_tls_common.heap.mafreelist == NULL) { + ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); + } + else { + ma = ptls->gc_tls_common.heap.mafreelist; + ptls->gc_tls_common.heap.mafreelist = ma->next; + } + ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); + ma->next = ptls->gc_tls_common.heap.mallocarrays; + ptls->gc_tls_common.heap.mallocarrays = ma; +} + +// =========================================================================== // +// GC Debug +// =========================================================================== // + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = module_usings_length(m); + elsize = sizeof(struct _jl_module_using); + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + +// =========================================================================== // +// GC Control +// =========================================================================== // + +JL_DLLEXPORT uint32_t jl_get_gc_disable_counter(void) { + return jl_atomic_load_acquire(&jl_gc_disable_counter); +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + + +// collector entry point and control +_Atomic(uint32_t) jl_gc_disable_counter = 1; + +JL_DLLEXPORT int jl_gc_enable(int on) +{ + jl_ptls_t ptls = jl_current_task->ptls; + int prev = !ptls->disable_gc; + ptls->disable_gc = (on == 0); + if (on && !prev) { + // disable -> enable + if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { + gc_num.allocd += gc_num.deferred_alloc; + gc_num.deferred_alloc = 0; + } + } + else if (prev && !on) { + // enable -> disable + jl_atomic_fetch_add(&jl_gc_disable_counter, 1); + // check if the GC is running and wait for it to finish + jl_gc_safepoint_(ptls); + } + return prev; +} + // =========================================================================== // // MISC // =========================================================================== // +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_new_weakref_th(ptls, value); +} + const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { diff --git a/src/gc-common.h b/src/gc-common.h index 4d53830442a7d..32b7470b13a58 100644 --- a/src/gc-common.h +++ b/src/gc-common.h @@ -53,6 +53,12 @@ extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure; // malloc wrappers, aligned allocation // =========================================================================== // +// data structure for tracking malloc'd genericmemory. +typedef struct _mallocmemory_t { + jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory + struct _mallocmemory_t *next; +} mallocmemory_t; + #if defined(_OS_WINDOWS_) STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) { @@ -173,4 +179,10 @@ JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o); extern int gc_n_threads; extern jl_ptls_t* gc_all_tls_states; +// =========================================================================== // +// Logging +// =========================================================================== // + +extern int gc_logging_enabled; + #endif // JL_GC_COMMON_H diff --git a/src/gc-debug.c b/src/gc-debug.c index 5c150aba68e10..7c479484cde45 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1105,48 +1105,6 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT -{ - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; -} - -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT -{ - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = module_usings_length(m); - elsize = sizeof(struct _jl_module_using); - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; -} - -static int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; -} - -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; -} - void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { return; diff --git a/src/gc-interface.h b/src/gc-interface.h index bb2abbe2d36ac..0b5df17a3b8c5 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -96,6 +96,8 @@ JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem); // should run a collection cycle again (e.g. a full mark right after a full sweep to ensure // we do a full heap traversal). JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection); +// Returns whether the thread with `tid` is a collector thread +JL_DLLEXPORT int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT; // ========================================================================= // // Metrics @@ -130,6 +132,13 @@ JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void); // Allocation // ========================================================================= // +// On GCC, this function is inlined when sz is constant (see julia_internal.h) +// In general, this function should implement allocation and should use the specific GC's logic +// to decide whether to allocate a small or a large object. Finally, note that this function +// **must** also set the type of the returning object to be `ty`. The type `ty` may also be used to record +// an allocation of that type in the allocation profiler. +struct _jl_value_t *jl_gc_alloc_(struct _jl_tls_states_t * ptls, size_t sz, void *ty); + // Allocates small objects and increments Julia allocation counterst. Size of the object // header must be included in the object size. The (possibly unused in some implementations) // offset to the arena in which we're allocating is passed in the second parameter, and the @@ -157,26 +166,6 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz); JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz); // Wrapper around Libc realloc that updates Julia allocation counters. JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz); -// Wrapper around Libc malloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_malloc(size_t sz); -// Wrapper around Libc calloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz); -// Wrapper around Libc free that takes a pointer to the payload of a memory region allocated -// with jl_malloc or jl_calloc, and uses the size information stored in the first machine -// words of the memory buffer update Julia allocation counters, and then frees the -// corresponding memory buffer. -JL_DLLEXPORT void jl_free(void *p); -// Wrapper around Libc realloc that takes a memory region allocated with jl_malloc or -// jl_calloc, and uses the size information stored in the first machine words of the memory -// buffer to update Julia allocation counters, reallocating the corresponding memory buffer -// in the end. -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz); // Wrapper around Libc malloc that's used to dynamically allocate memory for Arrays and // Strings. It increments Julia allocation counters and should check whether we're close to // the Julia heap target, and therefore, whether we should run a collection. Note that this @@ -190,14 +179,6 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz); // thread-local allocator of the thread referenced by the first jl_ptls_t argument. JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref_th(struct _jl_tls_states_t *ptls, struct _jl_value_t *value); -// Allocates a new weak-reference, assigns its value and increments Julia allocation -// counters. If thread-local allocators are used, then this function should allocate in the -// thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref(struct _jl_value_t *value); -// Allocates an object whose size is specified by the function argument and increments Julia -// allocation counters. If thread-local allocators are used, then this function should -// allocate in the thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_value_t *jl_gc_allocobj(size_t sz); // Permanently allocates a memory slot of the size specified by the first parameter. This // block of memory is allocated in an immortal region that is never swept. The second // parameter specifies whether the memory should be filled with zeros. The third and fourth diff --git a/src/gc-stacks.c b/src/gc-stacks.c index f6e787a4c1d2d..a2d3862dc9501 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -47,7 +47,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { VirtualFree(stkbuf, 0, MEM_RELEASE); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); @@ -82,7 +82,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT return stk; } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { munmap(stkbuf, bufsz); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); @@ -132,7 +132,7 @@ void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) { unsigned pool_id = select_pool(bufsz); if (pool_sizes[pool_id] == bufsz) { - small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf); + small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf); return; } } @@ -161,7 +161,7 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task) #ifdef _COMPILER_ASAN_ENABLED_ __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz); #endif - small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf); + small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf); } } } @@ -176,7 +176,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) { unsigned pool_id = select_pool(ssize); ssize = pool_sizes[pool_id]; - small_arraylist_t *pool = &ptls->gc_tls.heap.free_stacks[pool_id]; + small_arraylist_t *pool = &ptls->gc_tls_common.heap.free_stacks[pool_id]; if (pool->len > 0) { stk = small_arraylist_pop(pool); } @@ -197,7 +197,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO } *bufsz = ssize; if (owner) { - small_arraylist_t *live_tasks = &ptls->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls->gc_tls_common.heap.live_tasks; mtarraylist_push(live_tasks, owner); } return stk; @@ -228,7 +228,7 @@ void sweep_stack_pool_loop(void) JL_NOTSAFEPOINT // free half of stacks that remain unused since last sweep if (i == jl_atomic_load_relaxed(&gc_stack_free_idx)) { for (int p = 0; p < JL_N_STACK_POOLS; p++) { - small_arraylist_t *al = &ptls2->gc_tls.heap.free_stacks[p]; + small_arraylist_t *al = &ptls2->gc_tls_common.heap.free_stacks[p]; size_t n_to_free; if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { n_to_free = al->len; // not alive yet or dead, so it does not need these anymore @@ -251,10 +251,10 @@ void sweep_stack_pool_loop(void) JL_NOTSAFEPOINT } } if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { - small_arraylist_free(ptls2->gc_tls.heap.free_stacks); + small_arraylist_free(ptls2->gc_tls_common.heap.free_stacks); } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = 0; size_t ndel = 0; size_t l = live_tasks->len; @@ -306,7 +306,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void) jl_ptls_t ptls2 = allstates[i]; if (ptls2 == NULL) continue; - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); l += n + (ptls2->root_task->ctx.stkbuf != NULL); } @@ -325,7 +325,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void) goto restart; jl_array_data(a,void*)[j++] = t; } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); for (size_t i = 0; i < n; i++) { jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i); diff --git a/src/gc-stock.c b/src/gc-stock.c index f60aa89e6b11d..541c5b4ecc5c2 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -363,7 +363,7 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *valu jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); wr->value = value; // NOTE: wb not needed here - small_arraylist_push(&ptls->gc_tls.heap.weak_refs, wr); + small_arraylist_push(&ptls->gc_tls_common.heap.weak_refs, wr); return wr; } @@ -373,8 +373,8 @@ static void clear_weak_refs(void) for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; if (ptls2 != NULL) { - size_t n, l = ptls2->gc_tls.heap.weak_refs.len; - void **lst = ptls2->gc_tls.heap.weak_refs.items; + size_t n, l = ptls2->gc_tls_common.heap.weak_refs.len; + void **lst = ptls2->gc_tls_common.heap.weak_refs.items; for (n = 0; n < l; n++) { jl_weakref_t *wr = (jl_weakref_t*)lst[n]; if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc)) @@ -392,8 +392,8 @@ static void sweep_weak_refs(void) if (ptls2 != NULL) { size_t n = 0; size_t ndel = 0; - size_t l = ptls2->gc_tls.heap.weak_refs.len; - void **lst = ptls2->gc_tls.heap.weak_refs.items; + size_t l = ptls2->gc_tls_common.heap.weak_refs.len; + void **lst = ptls2->gc_tls_common.heap.weak_refs.items; if (l == 0) continue; while (1) { @@ -408,7 +408,7 @@ static void sweep_weak_refs(void) lst[n] = lst[n + ndel]; lst[n + ndel] = tmp; } - ptls2->gc_tls.heap.weak_refs.len -= ndel; + ptls2->gc_tls_common.heap.weak_refs.len -= ndel; } } } @@ -416,18 +416,18 @@ static void sweep_weak_refs(void) STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc) + sz; + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc) + sz; if (alloc_acc < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, alloc_acc); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, alloc_acc); else { jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); } } STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc) + sz); } // big value list @@ -448,10 +448,10 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) jl_throw(jl_memory_exception); gc_invoke_callbacks(jl_gc_cb_notify_external_alloc_t, gc_cblist_notify_external_alloc, (v, allocsz)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc) + 1); jl_batch_accum_heap_size(ptls, allocsz); #ifdef MEMDEBUG memset(v, 0xee, allocsz); @@ -561,29 +561,11 @@ static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT gc_time_big_end(); } -// tracking Memorys with malloc'd storage - -void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ - // This is **NOT** a GC safe point. - mallocmemory_t *ma; - if (ptls->gc_tls.heap.mafreelist == NULL) { - ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); - } - else { - ma = ptls->gc_tls.heap.mafreelist; - ptls->gc_tls.heap.mafreelist = ma->next; - } - ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); - ma->next = ptls->gc_tls.heap.mallocarrays; - ptls->gc_tls.heap.mallocarrays = ma; -} - - void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz); jl_batch_accum_heap_size(ptls, sz); } @@ -602,18 +584,18 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTS for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls) { - dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval); - dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc); - dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc); - dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc); - dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc); - dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval); + dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc); + dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc); + dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc); + dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc); + dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc); if (update_heap) { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc); - freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc); + freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc); jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -629,13 +611,13 @@ static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls != NULL) { // don't reset `pool_live_bytes` here - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -655,17 +637,6 @@ void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT reset_thread_gc_counts(); } -size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT -{ - const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; - size_t sz = layout->size * m->length; - if (layout->flags.arrayelem_isunion) - // account for isbits Union array selector bytes - sz += m->length; - return sz; -} - - static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT { assert(jl_is_genericmemory(v)); @@ -689,8 +660,8 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT for (int t_i = 0; t_i < gc_n_threads; t_i++) { jl_ptls_t ptls2 = gc_all_tls_states[t_i]; if (ptls2 != NULL) { - mallocmemory_t *ma = ptls2->gc_tls.heap.mallocarrays; - mallocmemory_t **pma = &ptls2->gc_tls.heap.mallocarrays; + mallocmemory_t *ma = ptls2->gc_tls_common.heap.mallocarrays; + mallocmemory_t **pma = &ptls2->gc_tls_common.heap.mallocarrays; while (ma != NULL) { mallocmemory_t *nxt = ma->next; jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1); @@ -702,8 +673,8 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT *pma = nxt; int isaligned = (uintptr_t)ma->a & 1; jl_gc_free_memory(a, isaligned); - ma->next = ptls2->gc_tls.heap.mafreelist; - ptls2->gc_tls.heap.mafreelist = ma; + ma->next = ptls2->gc_tls_common.heap.mafreelist; + ptls2->gc_tls_common.heap.mafreelist = ma; } gc_time_count_mallocd_memory(bits); ma = nxt; @@ -764,12 +735,12 @@ STATIC_INLINE jl_value_t *jl_gc_small_alloc_inner(jl_ptls_t ptls, int offset, return jl_gc_big_alloc(ptls, osize, NULL); #endif maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + osize); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + osize); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc) + 1); // first try to use the freelist jl_taggedvalue_t *v = p->freelist; if (v != NULL) { @@ -824,6 +795,29 @@ jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset, int osize) { return jl_gc_small_alloc_inner(ptls, offset, osize); } +// Size does NOT include the type tag!! +inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) +{ + jl_value_t *v; + const size_t allocsz = sz + sizeof(jl_taggedvalue_t); + if (sz <= GC_MAX_SZCLASS) { + int pool_id = jl_gc_szclass(allocsz); + jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; + int osize = jl_gc_sizeclasses[pool_id]; + // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in + // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) + v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); + } + else { + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + v = jl_gc_big_alloc_noinline(ptls, allocsz); + } + jl_set_typeof(v, ty); + maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); + return v; +} + int jl_gc_classify_pools(size_t sz, int *osize) { if (sz > GC_MAX_SZCLASS) @@ -983,8 +977,8 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_ // instead of adding it to the thread that originally allocated the page, so we can avoid // an atomic-fetch-add here. size_t delta = (GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + delta); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + delta); jl_atomic_fetch_add_relaxed((_Atomic(int64_t) *)&gc_num.freed, (nfree - old_nfree) * osize); } @@ -1277,7 +1271,7 @@ static void gc_sweep_pool(void) } continue; } - jl_atomic_store_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes, 0); + jl_atomic_store_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes, 0); for (int i = 0; i < JL_GC_N_POOLS; i++) { jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i]; jl_taggedvalue_t *last = p->freelist; @@ -2841,34 +2835,8 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } -// collector entry point and control -_Atomic(uint32_t) jl_gc_disable_counter = 1; - -JL_DLLEXPORT int jl_gc_enable(int on) -{ - jl_ptls_t ptls = jl_current_task->ptls; - int prev = !ptls->disable_gc; - ptls->disable_gc = (on == 0); - if (on && !prev) { - // disable -> enable - if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { - gc_num.allocd += gc_num.deferred_alloc; - gc_num.deferred_alloc = 0; - } - } - else if (prev && !on) { - // enable -> disable - jl_atomic_fetch_add(&jl_gc_disable_counter, 1); - // check if the GC is running and wait for it to finish - jl_gc_safepoint_(ptls); - } - return prev; -} - -JL_DLLEXPORT int jl_gc_is_enabled(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; +int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT { + return gc_is_parallel_collector_thread(tid) || gc_is_concurrent_collector_thread(tid); } JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT @@ -2879,11 +2847,6 @@ JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT *bytes = (num.total_allocd + num.deferred_alloc + num.allocd); } -JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) -{ - return gc_num.total_time; -} - JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { jl_gc_num_t num = gc_num; @@ -2918,7 +2881,7 @@ JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void) for (int i = 0; i < n_threads; i++) { jl_ptls_t ptls2 = all_tls_states[i]; if (ptls2 != NULL) { - pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes); + pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes); } } return pool_live_bytes; @@ -3271,13 +3234,13 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // free empty GC state for threads that have exited if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { // GC threads should never exit - assert(!gc_is_parallel_collector_thread(t_i)); - assert(!gc_is_concurrent_collector_thread(t_i)); + assert(!gc_is_collector_thread(t_i)); + jl_thread_heap_common_t *common_heap = &ptls2->gc_tls_common.heap; jl_thread_heap_t *heap = &ptls2->gc_tls.heap; - if (heap->weak_refs.len == 0) - small_arraylist_free(&heap->weak_refs); - if (heap->live_tasks.len == 0) - small_arraylist_free(&heap->live_tasks); + if (common_heap->weak_refs.len == 0) + small_arraylist_free(&common_heap->weak_refs); + if (common_heap->live_tasks.len == 0) + small_arraylist_free(&common_heap->live_tasks); if (heap->remset.len == 0) arraylist_free(&heap->remset); if (ptls2->finalizers.len == 0) @@ -3346,8 +3309,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { - size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval; + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; @@ -3449,16 +3412,10 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq) gc_mark_roots(mq); } -// allocator entry points - -JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) -{ - return jl_gc_alloc_(ptls, sz, ty); -} - // Per-thread initialization void jl_init_thread_heap(jl_ptls_t ptls) { + jl_thread_heap_common_t *common_heap = &ptls->gc_tls_common.heap; jl_thread_heap_t *heap = &ptls->gc_tls.heap; jl_gc_pool_t *p = heap->norm_pools; for (int i = 0; i < JL_GC_N_POOLS; i++) { @@ -3466,12 +3423,12 @@ void jl_init_thread_heap(jl_ptls_t ptls) p[i].freelist = NULL; p[i].newpages = NULL; } - small_arraylist_new(&heap->weak_refs, 0); - small_arraylist_new(&heap->live_tasks, 0); + small_arraylist_new(&common_heap->weak_refs, 0); + small_arraylist_new(&common_heap->live_tasks, 0); for (int i = 0; i < JL_N_STACK_POOLS; i++) - small_arraylist_new(&heap->free_stacks[i], 0); - heap->mallocarrays = NULL; - heap->mafreelist = NULL; + small_arraylist_new(&common_heap->free_stacks[i], 0); + common_heap->mallocarrays = NULL; + common_heap->mafreelist = NULL; heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag; @@ -3497,8 +3454,8 @@ void jl_init_thread_heap(jl_ptls_t ptls) jl_atomic_store_relaxed(&q->array, wsa2); arraylist_new(&mq->reclaim_set, 32); - memset(&ptls->gc_tls.gc_num, 0, sizeof(ptls->gc_tls.gc_num)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + memset(&ptls->gc_tls_common.gc_num, 0, sizeof(ptls->gc_tls_common.gc_num)); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); } void jl_free_thread_gc_state(jl_ptls_t ptls) @@ -3685,10 +3642,10 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) if (data != NULL && pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, sz); } return data; @@ -3702,10 +3659,10 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) if (data != NULL && pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + nm*sz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + nm*sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, sz * nm); } return data; @@ -3730,10 +3687,10 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); if (!(sz < old)) - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + (sz - old)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + (sz - old)); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc) + 1); int64_t diff = sz - old; if (diff < 0) { @@ -3746,63 +3703,6 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size return data; } -// allocation wrappers that save the size of allocations, to allow using -// jl_gc_counted_* functions with a libc-compatible API. - -JL_DLLEXPORT void *jl_malloc(size_t sz) -{ - int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); - if (p == NULL) - return NULL; - p[0] = sz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -//_unchecked_calloc does not check for potential overflow of nm*sz -STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { - size_t nmsz = nm*sz; - int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); - if (p == NULL) - return NULL; - p[0] = nmsz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) -{ - if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) - return NULL; - return _unchecked_calloc(nm, sz); -} - -JL_DLLEXPORT void jl_free(void *p) -{ - if (p != NULL) { - int64_t *pp = (int64_t *)p - 2; - size_t sz = pp[0]; - jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); - } -} - -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) -{ - int64_t *pp; - size_t szold; - if (p == NULL) { - pp = NULL; - szold = 0; - } - else { - pp = (int64_t *)p - 2; - szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; - } - int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); - if (pnew == NULL) - return NULL; - pnew[0] = sz; - return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - // allocating blocks for Arrays and Strings JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) @@ -3821,10 +3721,10 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) if (b == NULL) jl_throw(jl_memory_exception); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, allocsz); #ifdef _OS_WINDOWS_ SetLastError(last_error); @@ -3936,18 +3836,6 @@ jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT return jl_valueof(o); } -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - -JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sz, NULL); -} - JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) { if (jl_is_initialized()) { @@ -4075,11 +3963,6 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) } -JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) -{ - return jl_gc_alloc(ptls, sz, ty); -} - JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { arraylist_push(&ptls->gc_tls.sweep_objs, obj); diff --git a/src/gc-stock.h b/src/gc-stock.h index 76cecf68067bf..b9a2e720f120a 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -106,12 +106,6 @@ JL_EXTENSION typedef struct _bigval_t { // must be 64-byte aligned here, in 32 & 64 bit modes } bigval_t; -// data structure for tracking malloc'd genericmemory. -typedef struct _mallocmemory_t { - jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory - struct _mallocmemory_t *next; -} mallocmemory_t; - // pool page metadata typedef struct _jl_gc_pagemeta_t { // next metadata structure in per-thread list diff --git a/src/gc-tls-common.h b/src/gc-tls-common.h new file mode 100644 index 0000000000000..ba36f5c1c238e --- /dev/null +++ b/src/gc-tls-common.h @@ -0,0 +1,52 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +// Meant to be included in "julia_threads.h" +#ifndef JL_GC_TLS_COMMON_H +#define JL_GC_TLS_COMMON_H + +#include "julia_atomics.h" + +// GC threading ------------------------------------------------------------------ + +#include "arraylist.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + // variable for tracking weak references + small_arraylist_t weak_refs; + // live tasks started on this thread + // that are holding onto a stack from the pool + small_arraylist_t live_tasks; + + // variables for tracking malloc'd arrays + struct _mallocmemory_t *mallocarrays; + struct _mallocmemory_t *mafreelist; + +#define JL_N_STACK_POOLS 16 + small_arraylist_t free_stacks[JL_N_STACK_POOLS]; +} jl_thread_heap_common_t; + +typedef struct { + _Atomic(int64_t) allocd; + _Atomic(int64_t) pool_live_bytes; + _Atomic(uint64_t) malloc; + _Atomic(uint64_t) realloc; + _Atomic(uint64_t) poolalloc; + _Atomic(uint64_t) bigalloc; + _Atomic(int64_t) free_acc; + _Atomic(uint64_t) alloc_acc; +} jl_thread_gc_num_common_t; + +typedef struct { + jl_thread_heap_common_t heap; + jl_thread_gc_num_common_t gc_num; +} jl_gc_tls_states_common_t; + +#ifdef __cplusplus +} +#endif + +#endif // JL_GC_TLS_COMMON_H diff --git a/src/gc-tls.h b/src/gc-tls.h index 3c2cc029a6183..d82506383c501 100644 --- a/src/gc-tls.h +++ b/src/gc-tls.h @@ -21,16 +21,6 @@ typedef struct { } jl_gc_pool_t; typedef struct { - // variable for tracking weak references - small_arraylist_t weak_refs; - // live tasks started on this thread - // that are holding onto a stack from the pool - small_arraylist_t live_tasks; - - // variables for tracking malloc'd arrays - struct _mallocmemory_t *mallocarrays; - struct _mallocmemory_t *mafreelist; - // variable for tracking young (i.e. not in `GC_OLD_MARKED`/last generation) large objects struct _bigval_t *young_generation_of_bigvals; @@ -42,22 +32,8 @@ typedef struct { // variables for allocating objects from pools #define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h` jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS]; - -#define JL_N_STACK_POOLS 16 - small_arraylist_t free_stacks[JL_N_STACK_POOLS]; } jl_thread_heap_t; -typedef struct { - _Atomic(int64_t) allocd; - _Atomic(int64_t) pool_live_bytes; - _Atomic(uint64_t) malloc; - _Atomic(uint64_t) realloc; - _Atomic(uint64_t) poolalloc; - _Atomic(uint64_t) bigalloc; - _Atomic(int64_t) free_acc; - _Atomic(uint64_t) alloc_acc; -} jl_thread_gc_num_t; - typedef struct { ws_queue_t chunk_queue; ws_queue_t ptr_queue; @@ -78,7 +54,6 @@ typedef struct { typedef struct { jl_thread_heap_t heap; jl_gc_page_stack_t page_metadata_allocd; - jl_thread_gc_num_t gc_num; jl_gc_markqueue_t mark_queue; jl_gc_mark_cache_t gc_cache; _Atomic(size_t) gc_sweeps_requested; diff --git a/src/genericmemory.c b/src/genericmemory.c index ea52fca66ba48..5c48e3202493e 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -45,7 +45,7 @@ jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t is prod += nel; } if (nel >= MAXINTVAL || prod >= (wideint_t) MAXINTVAL) - jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: too large for system address width"); + jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: the number of elements is either negative or too large for system address width"); size_t tot = (size_t)prod + LLT_ALIGN(sizeof(jl_genericmemory_t),JL_SMALL_BYTE_ALIGNMENT); int pooled = tot <= GC_MAX_SZCLASS; diff --git a/src/gf.c b/src/gf.c index fc2e62ebff96b..285942cd157c5 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2566,12 +2566,10 @@ static void record_precompile_statement(jl_method_instance_t *mi, double compila jl_static_show(s_precompile, mi->specTypes); jl_printf(s_precompile, ")"); if (is_recompile) { + jl_printf(s_precompile, " # recompile"); if (s_precompile == JL_STDERR && jl_options.color != JL_OPTIONS_COLOR_OFF) { jl_printf(s_precompile, "\e[0m"); } - else { - jl_printf(s_precompile, " # recompile"); - } } jl_printf(s_precompile, "\n"); if (s_precompile != JL_STDERR) @@ -3222,6 +3220,23 @@ jl_value_t *jl_argtype_with_function_type(jl_value_t *ft JL_MAYBE_UNROOTED, jl_v return tt; } +// undo jl_argtype_with_function transform +jl_value_t *jl_argtype_without_function(jl_value_t *ftypes) +{ + jl_value_t *types = jl_unwrap_unionall(ftypes); + size_t l = jl_nparams(types); + if (l == 1 && jl_is_vararg(jl_tparam0(types))) + return ftypes; + jl_value_t *tt = (jl_value_t*)jl_alloc_svec(l - 1); + JL_GC_PUSH1(&tt); + for (size_t i = 1; i < l; i++) + jl_svecset(tt, i - 1, jl_tparam(types, i)); + tt = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)tt, 0); + tt = jl_rewrap_unionall_(tt, types); + JL_GC_POP(); + return tt; +} + #ifdef JL_TRACE static int trace_en = 0; static int error_en = 1; diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 313449dda5557..c8d8356687dcf 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -64,9 +64,6 @@ using namespace llvm; #define DEBUG_TYPE "julia_jitlayers" STATISTIC(LinkedGlobals, "Number of globals linked"); -STATISTIC(CompiledCodeinsts, "Number of codeinsts compiled directly"); -STATISTIC(MaxWorkqueueSize, "Maximum number of elements in the workqueue"); -STATISTIC(IndirectCodeinsts, "Number of dependent codeinsts compiled"); STATISTIC(SpecFPtrCount, "Number of specialized function pointers compiled"); STATISTIC(UnspecFPtrCount, "Number of specialized function pointers compiled"); STATISTIC(ModulesAdded, "Number of modules added to the JIT"); @@ -151,13 +148,6 @@ void jl_dump_llvm_opt_impl(void *s) **jl_ExecutionEngine->get_dump_llvm_opt_stream() = (ios_t*)s; } -#ifndef JL_USE_JITLINK -static int jl_add_to_ee( - orc::ThreadSafeModule &M, - const StringMap &NewExports, - DenseMap &Queued, - SmallVectorImpl &Stack) JL_NOTSAFEPOINT; -#endif static void jl_decorate_module(Module &M) JL_NOTSAFEPOINT; void jl_link_global(GlobalVariable *GV, void *addr) JL_NOTSAFEPOINT @@ -187,214 +177,536 @@ void jl_jit_globals(std::map &globals) JL_NOTSAFEPOINT } } -// this generates llvm code for the lambda info -// and adds the result to the jitlayers -// (and the shadow module), -// and generates code for it -static jl_callptr_t _jl_compile_codeinst( - jl_code_instance_t *codeinst, - jl_code_info_t *src, - orc::ThreadSafeContext context) + // lock for places where only single threaded behavior is implemented, so we need GC support +static jl_mutex_t jitlock; + // locks for adding external code to the JIT atomically +static std::mutex extern_c_lock; + // locks and barriers for this state +static std::mutex engine_lock; +static std::condition_variable engine_wait; +static int threads_in_compiler_phase; + // the TSM for each codeinst +static SmallVector sharedmodules; +static DenseMap emittedmodules; + // the invoke and specsig function names in the JIT +static DenseMap invokenames; + // everything that any thread wants to compile right now +static DenseSet compileready; + // everything that any thread has compiled recently +static DenseSet linkready; + // a map from a codeinst to the outgoing edges needed before linking it +static DenseMap> complete_graph; + // the state for each codeinst and the number of unresolved edges (we don't + // really need this once JITLink is available everywhere, since every module + // is automatically complete, and we can emit any required fixups later as a + // separate module) +static DenseMap> incompletemodules; + // the set of incoming unresolved edges resolved by a codeinstance +static DenseMap> incomplete_rgraph; + +// Lock hierarchy here: +// jitlock is outermost, can contain others and allows GC +// engine_lock is next +// ThreadSafeContext locks are next, they should not be nested (unless engine_lock is also held, but this may make TSAN sad anyways) +// extern_c_lock is next +// jl_ExecutionEngine internal locks are exclusive to this list, since OrcJIT promises to never hold a lock over a materialization unit: +// construct a query object from a query set and query handler +// lock the session +// lodge query against requested symbols, collect required materializers (if any) +// unlock the session +// dispatch materializers (if any) +// However, this guarantee relies on Julia releasing all TSC locks before causing any materialization units to be dispatched +// as materialization may need to acquire TSC locks. + + +static void finish_params(Module *M, jl_codegen_params_t ¶ms) JL_NOTSAFEPOINT { - // caller must hold codegen_lock - // and have disabled finalizers - uint64_t start_time = 0; - bool timed = !!*jl_ExecutionEngine->get_dump_compiles_stream(); - if (timed) - start_time = jl_hrtime(); + if (params._shared_module) { + sharedmodules.push_back(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx)); + } + + // In imaging mode, we can't inline global variable initializers in order to preserve + // the fiction that we don't know what loads from the global will return. Thus, we + // need to emit a separate module for the globals before any functions are compiled, + // to ensure that the globals are defined when they are compiled. + if (params.imaging_mode) { + if (!params.global_targets.empty()) { + void **globalslots = new void*[params.global_targets.size()]; + void **slot = globalslots; + for (auto &global : params.global_targets) { + auto GV = global.second; + *slot = global.first; + jl_ExecutionEngine->addGlobalMapping(GV->getName(), (uintptr_t)slot); + slot++; + } +#ifdef __clang_analyzer__ + static void **leaker = globalslots; // for the purpose of the analyzer, we need to expressly leak this variable or it thinks we forgot to free it +#endif + } + } + else { + StringMap NewGlobals; + for (auto &global : params.global_targets) { + NewGlobals[global.second->getName()] = global.first; + } + for (auto &GV : M->globals()) { + auto InitValue = NewGlobals.find(GV.getName()); + if (InitValue != NewGlobals.end()) { + jl_link_global(&GV, InitValue->second); + } + } + } +} - assert(jl_is_code_instance(codeinst)); - JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE); - jl_callptr_t fptr = NULL; - // emit the code in LLVM IR form - jl_codegen_params_t params(std::move(context), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context - params.cache = true; - params.imaging_mode = imaging_default(); - params.debug_level = jl_options.debug_level; - { - orc::ThreadSafeModule result_m = - jl_create_ts_module(name_from_method_instance(codeinst->def), params.tsctx, params.DL, params.TargetTriple); - jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params); - if (result_m) - params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; - jl_compile_workqueue(params, CompilationPolicy::Default); - - if (params._shared_module) { - jl_ExecutionEngine->optimizeDLSyms(*params._shared_module); - jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx)); +static int jl_analyze_workqueue(jl_code_instance_t *callee, jl_codegen_params_t ¶ms, bool forceall=false) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER +{ + jl_task_t *ct = jl_current_task; + decltype(params.workqueue) edges; + std::swap(params.workqueue, edges); + for (auto &it : edges) { + jl_code_instance_t *codeinst = it.first; + auto &proto = it.second; + // try to emit code for this item from the workqueue + StringRef invokeName = ""; + StringRef preal_decl = ""; + bool preal_specsig = false; + jl_callptr_t invoke = nullptr; + bool isedge = false; + assert(params.cache); + // Checking the cache here is merely an optimization and not strictly required + // But it must be consistent with the following invokenames lookup, which is protected by the engine_lock + uint8_t specsigflags; + void *fptr; + jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0); + //if (specsig ? specsigflags & 0b1 : invoke == jl_fptr_args_addr) + if (invoke == jl_fptr_args_addr) { + preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); } - - // In imaging mode, we can't inline global variable initializers in order to preserve - // the fiction that we don't know what loads from the global will return. Thus, we - // need to emit a separate module for the globals before any functions are compiled, - // to ensure that the globals are defined when they are compiled. - if (params.imaging_mode) { - // Won't contain any PLT/dlsym calls, so no need to optimize those - if (!params.global_targets.empty()) { - void **globalslots = new void*[params.global_targets.size()]; - void **slot = globalslots; - for (auto &global : params.global_targets) { - auto GV = global.second; - *slot = global.first; - jl_ExecutionEngine->addGlobalMapping(GV->getName(), (uintptr_t)slot); - slot++; + else if (specsigflags & 0b1) { + preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); + preal_specsig = true; + } + bool force = forceall || invoke != nullptr; + if (preal_decl.empty()) { + auto it = invokenames.find(codeinst); + if (it != invokenames.end()) { + auto &decls = it->second; + invokeName = decls.functionObject; + if (decls.functionObject == "jl_fptr_args") { + preal_decl = decls.specFunctionObject; + isedge = true; } -#ifdef __clang_analyzer__ - static void **leaker = globalslots; // for the purpose of the analyzer, we need to expressly leak this variable or it thinks we forgot to free it -#endif + else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") { + preal_decl = decls.specFunctionObject; + preal_specsig = true; + isedge = true; + } + force = true; } } - else { - StringMap NewGlobals; - for (auto &global : params.global_targets) { - NewGlobals[global.second->getName()] = global.first; + if (!preal_decl.empty() || force) { + // if we have a prototype emitted, compare it to what we emitted earlier + Module *mod = proto.decl->getParent(); + assert(proto.decl->isDeclaration()); + Function *pinvoke = nullptr; + if (preal_decl.empty()) { + if (invoke != nullptr && invokeName.empty()) { + assert(invoke != jl_fptr_args_addr); + if (invoke == jl_fptr_sparam_addr) + invokeName = "jl_fptr_sparam"; + else if (invoke == jl_f_opaque_closure_call_addr) + invokeName = "jl_f_opaque_closure_call"; + else + invokeName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst); + } + pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params); + if (!proto.specsig) + proto.decl->replaceAllUsesWith(pinvoke); + isedge = false; } - for (auto &def : params.compiled_functions) { - auto M = std::get<0>(def.second).getModuleUnlocked(); - for (auto &GV : M->globals()) { - auto InitValue = NewGlobals.find(GV.getName()); - if (InitValue != NewGlobals.end()) { - jl_link_global(&GV, InitValue->second); - } + if (proto.specsig && !preal_specsig) { + // get or build an fptr1 that can invoke codeinst + if (pinvoke == nullptr) + pinvoke = get_or_emit_fptr1(preal_decl, mod); + // emit specsig-to-(jl)invoke conversion + proto.decl->setLinkage(GlobalVariable::InternalLinkage); + //protodecl->setAlwaysInline(); + jl_init_function(proto.decl, params.TargetTriple); + // TODO: maybe this can be cached in codeinst->specfptr? + int8_t gc_state = jl_gc_unsafe_enter(ct->ptls); // codegen may contain safepoints (such as jl_subtype calls) + jl_method_instance_t *mi = codeinst->def; + size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed + bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; + emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke, 0, 0); + jl_gc_unsafe_leave(ct->ptls, gc_state); + preal_decl = ""; // no need to fixup the name + } + if (!preal_decl.empty()) { + // merge and/or rename this prototype to the real function + if (Value *specfun = mod->getNamedValue(preal_decl)) { + if (proto.decl != specfun) + proto.decl->replaceAllUsesWith(specfun); + } + else { + proto.decl->setName(preal_decl); } } - } - -#ifndef JL_USE_JITLINK - // Collect the exported functions from the params.compiled_functions modules, - // which form dependencies on which functions need to be - // compiled first. Cycles of functions are compiled together. - // (essentially we compile a DAG of SCCs in reverse topological order, - // if we treat declarations of external functions as edges from declaration - // to definition) - StringMap NewExports; - for (auto &def : params.compiled_functions) { - orc::ThreadSafeModule &TSM = std::get<0>(def.second); - //The underlying context object is still locked because params is not destroyed yet - auto M = TSM.getModuleUnlocked(); - jl_ExecutionEngine->optimizeDLSyms(*M); - for (auto &F : M->global_objects()) { - if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) { - NewExports[F.getName()] = &TSM; + if (proto.oc) { // additionally, if we are dealing with an OC constructor, then we might also need to fix up the fptr1 reference too + assert(proto.specsig); + StringRef ocinvokeDecl = invokeName; + if (invoke != nullptr && ocinvokeDecl.empty()) { + // check for some special tokens used by opaque_closure.c and convert those to their real functions + assert(invoke != jl_fptr_args_addr); + assert(invoke != jl_fptr_sparam_addr); + if (invoke == jl_fptr_interpret_call_addr) + ocinvokeDecl = "jl_fptr_interpret_call"; + else if (invoke == jl_fptr_const_return_addr) + ocinvokeDecl = "jl_fptr_const_return"; + else if (invoke == jl_f_opaque_closure_call_addr) + ocinvokeDecl = "jl_f_opaque_closure_call"; + //else if (invoke == jl_interpret_opaque_closure_addr) + else + ocinvokeDecl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst); + } + // if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too + // XXX: this invoke translation logic is supposed to exactly match new_opaque_closure + if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return") { + if (pinvoke == nullptr) + ocinvokeDecl = get_or_emit_fptr1(preal_decl, mod)->getName(); + else + ocinvokeDecl = pinvoke->getName(); + } + assert(!ocinvokeDecl.empty()); + assert(ocinvokeDecl != "jl_fptr_args"); + assert(ocinvokeDecl != "jl_fptr_sparam"); + // merge and/or rename this prototype to the real function + if (Value *specfun = mod->getNamedValue(ocinvokeDecl)) { + if (proto.oc != specfun) + proto.oc->replaceAllUsesWith(specfun); + } + else { + proto.oc->setName(ocinvokeDecl); } } } - DenseMap Queued; - SmallVector Stack; - for (auto &def : params.compiled_functions) { - // Add the results to the execution engine now - orc::ThreadSafeModule &M = std::get<0>(def.second); - jl_add_to_ee(M, NewExports, Queued, Stack); - assert(Queued.empty() && Stack.empty() && !M); + else { + isedge = true; + params.workqueue.push_back(it); + incomplete_rgraph[codeinst].push_back(callee); } -#else - for (auto &def : params.compiled_functions) { - // Add the results to the execution engine now - orc::ThreadSafeModule &M = std::get<0>(def.second); - if (M) - jl_ExecutionEngine->addModule(std::move(M)); + if (isedge) + complete_graph[callee].push_back(codeinst); + } + return params.workqueue.size(); +} + +// test whether codeinst->invoke is usable already without further compilation needed +static bool jl_is_compiled_codeinst(jl_code_instance_t *codeinst) JL_NOTSAFEPOINT +{ + auto invoke = jl_atomic_load_relaxed(&codeinst->invoke); + if (invoke == nullptr || invoke == jl_fptr_wait_for_compiled_addr) + return false; + return true; +} + +// move codeinst (and deps) from incompletemodules to emitted modules +// and populate compileready from complete_graph +static void prepare_compile(jl_code_instance_t *codeinst) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER +{ + SmallVector workqueue; + workqueue.push_back(codeinst); + while (!workqueue.empty()) { + codeinst = workqueue.pop_back_val(); + if (!invokenames.count(codeinst)) { + // this means it should be compiled already while the callee was in stasis + assert(jl_is_compiled_codeinst(codeinst)); + continue; } -#endif - ++CompiledCodeinsts; - MaxWorkqueueSize.updateMax(params.compiled_functions.size()); - IndirectCodeinsts += params.compiled_functions.size() - 1; - } - - // batch compile job for all new functions - SmallVector NewDefs; - for (auto &def : params.compiled_functions) { - jl_llvm_functions_t &decls = std::get<1>(def.second); - if (decls.functionObject != "jl_fptr_args" && - decls.functionObject != "jl_fptr_sparam" && - decls.functionObject != "jl_f_opaque_closure_call") - NewDefs.push_back(decls.functionObject); - if (!decls.specFunctionObject.empty()) - NewDefs.push_back(decls.specFunctionObject); - } - auto Addrs = jl_ExecutionEngine->findSymbols(NewDefs); - - size_t i = 0; - size_t nextaddr = 0; - for (auto &def : params.compiled_functions) { - jl_code_instance_t *this_code = def.first; - if (i < jl_timing_print_limit) - jl_timing_show_func_sig(this_code->def->specTypes, JL_TIMING_DEFAULT_BLOCK); - - jl_llvm_functions_t &decls = std::get<1>(def.second); - jl_callptr_t addr; - bool isspecsig = false; - if (decls.functionObject == "jl_fptr_args") { - addr = jl_fptr_args_addr; + // if this was incomplete, force completion now of it + auto it = incompletemodules.find(codeinst); + if (it != incompletemodules.end()) { + int waiting = 0; + auto &edges = complete_graph[codeinst]; + auto edges_end = std::remove_if(edges.begin(), edges.end(), [&waiting, codeinst] (jl_code_instance_t *edge) JL_NOTSAFEPOINT -> bool { + auto &redges = incomplete_rgraph[edge]; + // waiting += std::erase(redges, codeinst); + auto redges_end = std::remove(redges.begin(), redges.end(), codeinst); + if (redges_end != redges.end()) { + waiting += redges.end() - redges_end; + redges.erase(redges_end, redges.end()); + assert(!invokenames.count(edge)); + } + return !invokenames.count(edge); + }); + edges.erase(edges_end, edges.end()); + assert(waiting == std::get<1>(it->second)); + std::get<1>(it->second) = 0; + auto ¶ms = std::get<0>(it->second); + params.tsctx_lock = params.tsctx.getLock(); + waiting = jl_analyze_workqueue(codeinst, params, true); // may safepoint + assert(!waiting); (void)waiting; + Module *M = emittedmodules[codeinst].getModuleUnlocked(); + finish_params(M, params); + incompletemodules.erase(it); + } + // and then indicate this should be compiled now + if (!linkready.count(codeinst) && compileready.insert(codeinst).second) { + auto edges = complete_graph.find(codeinst); + if (edges != complete_graph.end()) { + workqueue.append(edges->second); + } + } + } +} + +// notify any other pending work that this edge now has code defined +static void complete_emit(jl_code_instance_t *edge) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER +{ + auto notify = incomplete_rgraph.find(edge); + if (notify == incomplete_rgraph.end()) + return; + auto redges = std::move(notify->second); + incomplete_rgraph.erase(notify); + for (size_t i = 0; i < redges.size(); i++) { + jl_code_instance_t *callee = redges[i]; + auto it = incompletemodules.find(callee); + assert(it != incompletemodules.end()); + if (--std::get<1>(it->second) == 0) { + auto ¶ms = std::get<0>(it->second); + params.tsctx_lock = params.tsctx.getLock(); + assert(callee == it->first); + int waiting = jl_analyze_workqueue(callee, params); // may safepoint + assert(!waiting); (void)waiting; + Module *M = emittedmodules[callee].getModuleUnlocked(); + finish_params(M, params); + incompletemodules.erase(it); } - else if (decls.functionObject == "jl_fptr_sparam") { - addr = jl_fptr_sparam_addr; + } +} + + +// set the invoke field for codeinst (and all deps, and assist with other pending work from other threads) now +static void jl_compile_codeinst_now(jl_code_instance_t *codeinst) +{ + jl_unique_gcsafe_lock lock(engine_lock); + if (!invokenames.count(codeinst)) + return; + threads_in_compiler_phase++; + prepare_compile(codeinst); // may safepoint + while (1) { + // TODO: split up this work by ThreadSafeContext, so two threads don't need to get the same locks and stall + if (!sharedmodules.empty()) { + auto TSM = sharedmodules.pop_back_val(); + lock.native.unlock(); + { + auto Lock = TSM.getContext().getLock(); + jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint + } + jl_ExecutionEngine->addModule(std::move(TSM)); + lock.native.lock(); } - else if (decls.functionObject == "jl_f_opaque_closure_call") { - addr = jl_f_opaque_closure_call_addr; + else if (!compileready.empty()) { + // move a function from compileready to linkready then compile it + auto compilenext = compileready.begin(); + codeinst = *compilenext; + compileready.erase(compilenext); + auto TSMref = emittedmodules.find(codeinst); + assert(TSMref != emittedmodules.end()); + auto TSM = std::move(TSMref->second); + linkready.insert(codeinst); + emittedmodules.erase(TSMref); + lock.native.unlock(); + uint64_t start_time = jl_hrtime(); + { + auto Lock = TSM.getContext().getLock(); + jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint + } + jl_ExecutionEngine->addModule(std::move(TSM)); // may safepoint + // If logging of the compilation stream is enabled, + // then dump the method-instance specialization type to the stream + jl_method_instance_t *mi = codeinst->def; + if (jl_is_method(mi->def.method)) { + auto stream = *jl_ExecutionEngine->get_dump_compiles_stream(); + if (stream) { + uint64_t end_time = jl_hrtime(); + ios_printf(stream, "%" PRIu64 "\t\"", end_time - start_time); + jl_static_show((JL_STREAM*)stream, mi->specTypes); + ios_printf(stream, "\"\n"); + } + } + lock.native.lock(); } else { - assert(NewDefs[nextaddr] == decls.functionObject); - addr = (jl_callptr_t)Addrs[nextaddr++]; - assert(addr); - isspecsig = true; + break; } - if (!decls.specFunctionObject.empty()) { - void *prev_specptr = NULL; - assert(NewDefs[nextaddr] == decls.specFunctionObject); - void *spec = (void*)Addrs[nextaddr++]; - assert(spec); - if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) { - // only set specsig and invoke if we were the first to set specptr - jl_atomic_store_relaxed(&this_code->specsigflags, (uint8_t) isspecsig); - // we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr - // either assumes that specptr was null, doesn't care about specptr, - // or will wait until specsigflags has 0b10 set before reloading invoke - jl_atomic_store_release(&this_code->invoke, addr); - jl_atomic_store_release(&this_code->specsigflags, (uint8_t) (0b10 | isspecsig)); - } else { - //someone else beat us, don't commit any results - while (!(jl_atomic_load_acquire(&this_code->specsigflags) & 0b10)) { - jl_cpu_pause(); + } + codeinst = nullptr; + // barrier until all threads have finished calling addModule + if (--threads_in_compiler_phase == 0) { + // the last thread out will finish linking everything + // then release all of the other threads + // move the function pointers out from invokenames to the codeinst + + // batch compile job for all new functions + SmallVector NewDefs; + for (auto &this_code : linkready) { + auto it = invokenames.find(this_code); + assert(it != invokenames.end()); + jl_llvm_functions_t &decls = it->second; + assert(!decls.functionObject.empty()); + if (decls.functionObject != "jl_fptr_args" && + decls.functionObject != "jl_fptr_sparam" && + decls.functionObject != "jl_f_opaque_closure_call") + NewDefs.push_back(decls.functionObject); + if (!decls.specFunctionObject.empty()) + NewDefs.push_back(decls.specFunctionObject); + } + auto Addrs = jl_ExecutionEngine->findSymbols(NewDefs); + + size_t nextaddr = 0; + for (auto &this_code : linkready) { + auto it = invokenames.find(this_code); + assert(it != invokenames.end()); + jl_llvm_functions_t &decls = it->second; + jl_callptr_t addr; + bool isspecsig = false; + if (decls.functionObject == "jl_fptr_args") { + addr = jl_fptr_args_addr; + } + else if (decls.functionObject == "jl_fptr_sparam") { + addr = jl_fptr_sparam_addr; + } + else if (decls.functionObject == "jl_f_opaque_closure_call") { + addr = jl_f_opaque_closure_call_addr; + } + else { + assert(NewDefs[nextaddr] == decls.functionObject); + addr = (jl_callptr_t)Addrs[nextaddr++]; + assert(addr); + isspecsig = true; + } + if (!decls.specFunctionObject.empty()) { + void *prev_specptr = nullptr; + assert(NewDefs[nextaddr] == decls.specFunctionObject); + void *spec = (void*)Addrs[nextaddr++]; + assert(spec); + if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) { + // only set specsig and invoke if we were the first to set specptr + jl_atomic_store_relaxed(&this_code->specsigflags, (uint8_t) isspecsig); + // we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr + // either assumes that specptr was null, doesn't care about specptr, + // or will wait until specsigflags has 0b10 set before reloading invoke + jl_atomic_store_release(&this_code->invoke, addr); + jl_atomic_store_release(&this_code->specsigflags, (uint8_t) (0b10 | isspecsig)); + } + else { + //someone else beat us, don't commit any results + while (!(jl_atomic_load_acquire(&this_code->specsigflags) & 0b10)) { + jl_cpu_pause(); + } + addr = jl_atomic_load_relaxed(&this_code->invoke); } - addr = jl_atomic_load_relaxed(&this_code->invoke); } - } else { - jl_callptr_t prev_invoke = NULL; - // Allow replacing addr if it is either NULL or our special waiting placeholder. - if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) { - if (prev_invoke == jl_fptr_wait_for_compiled_addr && !jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) { - addr = prev_invoke; - //TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other - //known lesser function) + else { + jl_callptr_t prev_invoke = nullptr; + // Allow replacing addr if it is either nullptr or our special waiting placeholder. + if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) { + if (prev_invoke == jl_fptr_wait_for_compiled_addr && !jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) { + addr = prev_invoke; + //TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other + //known lesser function) + } } } + invokenames.erase(it); + complete_graph.erase(this_code); } - if (this_code == codeinst) - fptr = addr; - i++; + linkready.clear(); + engine_wait.notify_all(); + } + else while (threads_in_compiler_phase) { + lock.wait(engine_wait); } - if (i > jl_timing_print_limit) - jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, "... <%d methods truncated>", i - 10); +} - uint64_t end_time = 0; - if (timed) - end_time = jl_hrtime(); - - // If logging of the compilation stream is enabled, - // then dump the method-instance specialization type to the stream - jl_method_instance_t *mi = codeinst->def; - if (jl_is_method(mi->def.method)) { - auto stream = *jl_ExecutionEngine->get_dump_compiles_stream(); - if (stream) { - ios_printf(stream, "%" PRIu64 "\t\"", end_time - start_time); - jl_static_show((JL_STREAM*)stream, mi->specTypes); - ios_printf(stream, "\"\n"); +static void jl_emit_codeinst_to_jit( + jl_code_instance_t *codeinst, + jl_code_info_t *src) +{ + { // lock scope + jl_unique_gcsafe_lock lock(engine_lock); + if (invokenames.count(codeinst) || jl_is_compiled_codeinst(codeinst)) + return; + } + JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE); + // emit the code in LLVM IR form to the new context + jl_codegen_params_t params(std::make_unique(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context + params.cache = true; + params.imaging_mode = imaging_default(); + params.debug_level = jl_options.debug_level; + orc::ThreadSafeModule result_m = + jl_create_ts_module(name_from_method_instance(codeinst->def), params.tsctx, params.DL, params.TargetTriple); + jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params); // contains safepoints + if (!result_m) + return; + { // drop lock before acquiring engine_lock + auto release = std::move(params.tsctx_lock); + } + jl_unique_gcsafe_lock lock(engine_lock); + if (invokenames.count(codeinst) || jl_is_compiled_codeinst(codeinst)) + return; // destroy everything + invokenames[codeinst] = std::move(decls); + complete_emit(codeinst); + params.tsctx_lock = params.tsctx.getLock(); // re-acquire lock + int waiting = jl_analyze_workqueue(codeinst, params); + if (waiting) { + auto release = std::move(params.tsctx_lock); // unlock again before moving from it + incompletemodules.insert(std::pair(codeinst, std::make_tuple(std::move(params), waiting))); + } + else { + finish_params(result_m.getModuleUnlocked(), params); + } + emittedmodules[codeinst] = std::move(result_m); +} + +static void recursive_compile_graph( + jl_code_instance_t *codeinst, + jl_code_info_t *src) +{ + jl_emit_codeinst_to_jit(codeinst, src); + DenseSet Seen; + SmallVector workqueue; + workqueue.push_back(codeinst); + // if any edges were incomplete, try to complete them now + while (!workqueue.empty()) { + auto this_code = workqueue.pop_back_val(); + if (Seen.insert(this_code).second) { + if (this_code != codeinst) + jl_emit_codeinst_to_jit(this_code, nullptr); // contains safepoints + jl_unique_gcsafe_lock lock(engine_lock); + auto edges = complete_graph.find(this_code); + if (edges != complete_graph.end()) { + workqueue.append(edges->second); + } } } - return fptr; } +// this generates llvm code for the lambda info +// and adds the result to the jitlayers +// (and the shadow module), +// and generates code for it +static jl_callptr_t _jl_compile_codeinst( + jl_code_instance_t *codeinst, + jl_code_info_t *src) +{ + recursive_compile_graph(codeinst, src); + jl_compile_codeinst_now(codeinst); + return jl_atomic_load_acquire(&codeinst->invoke); +} + + const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t ¶ms); // compile a C-callable alias @@ -415,42 +727,40 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void * orc::ThreadSafeModule backing; if (into == NULL) { if (!pparams) { - ctx = jl_ExecutionEngine->acquireContext(); + ctx = jl_ExecutionEngine->makeContext(); } backing = jl_create_ts_module("cextern", pparams ? pparams->tsctx : ctx, pparams ? pparams->DL : jl_ExecutionEngine->getDataLayout(), pparams ? pparams->TargetTriple : jl_ExecutionEngine->getTargetTriple()); into = &backing; } - auto target_info = into->withModuleDo([&](Module &M) { - return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple())); - }); - jl_codegen_params_t params(into->getContext(), std::move(target_info.first), std::move(target_info.second)); - params.imaging_mode = imaging_default(); - params.debug_level = jl_options.debug_level; - if (pparams == NULL) - pparams = ¶ms; - assert(pparams->tsctx.getContext() == into->getContext().getContext()); - const char *name = jl_generate_ccallable(wrap(into), sysimg, declrt, sigt, *pparams); bool success = true; - if (!sysimg) { - JL_LOCK(&jl_ExecutionEngine->jitlock); - if (jl_ExecutionEngine->getGlobalValueAddress(name)) { - success = false; - } - if (success && p == NULL) { - jl_jit_globals(params.global_targets); - assert(params.workqueue.empty()); - if (params._shared_module) { - jl_ExecutionEngine->optimizeDLSyms(*params._shared_module); - jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx)); + { + auto Lock = into->getContext().getLock(); + Module *M = into->getModuleUnlocked(); + jl_codegen_params_t params(into->getContext(), M->getDataLayout(), Triple(M->getTargetTriple())); + params.imaging_mode = imaging_default(); + params.debug_level = jl_options.debug_level; + if (pparams == NULL) + pparams = ¶ms; + assert(pparams->tsctx.getContext() == into->getContext().getContext()); + const char *name = jl_generate_ccallable(wrap(into), sysimg, declrt, sigt, *pparams); + if (!sysimg) { + jl_unique_gcsafe_lock lock(extern_c_lock); + if (jl_ExecutionEngine->getGlobalValueAddress(name)) { + success = false; + } + if (success && p == NULL) { + jl_jit_globals(params.global_targets); + assert(params.workqueue.empty()); + if (params._shared_module) { + jl_ExecutionEngine->optimizeDLSyms(*params._shared_module); // safepoint + jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx)); + } + } + if (success && llvmmod == NULL) { + jl_ExecutionEngine->optimizeDLSyms(*M); // safepoint + jl_ExecutionEngine->addModule(std::move(*into)); } } - if (success && llvmmod == NULL) { - into->withModuleDo([&](Module &M) { - jl_ExecutionEngine->optimizeDLSyms(M); - }); - jl_ExecutionEngine->addModule(std::move(*into)); - } - JL_UNLOCK(&jl_ExecutionEngine->jitlock); // Might GC } if (timed) { if (measure_compile_time_enabled) { @@ -459,9 +769,6 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void * } ct->reentrant_timing &= ~1ull; } - if (ctx.getContext()) { - jl_ExecutionEngine->releaseContext(std::move(ctx)); - } return success; } @@ -512,18 +819,13 @@ extern "C" JL_DLLEXPORT_CODEGEN int jl_compile_codeinst_impl(jl_code_instance_t *ci) { int newly_compiled = 0; - if (jl_atomic_load_relaxed(&ci->invoke) != NULL) { - return newly_compiled; - } - JL_LOCK(&jl_ExecutionEngine->jitlock); if (jl_atomic_load_relaxed(&ci->invoke) == NULL) { ++SpecFPtrCount; uint64_t start = jl_typeinf_timing_begin(); - _jl_compile_codeinst(ci, NULL, *jl_ExecutionEngine->getContext()); + _jl_compile_codeinst(ci, NULL); jl_typeinf_timing_end(start, 0); newly_compiled = 1; } - JL_UNLOCK(&jl_ExecutionEngine->jitlock); // Might GC return newly_compiled; } @@ -541,38 +843,39 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec) uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); - JL_LOCK(&jl_ExecutionEngine->jitlock); - if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) { - jl_code_info_t *src = NULL; - JL_GC_PUSH1(&src); - jl_method_t *def = unspec->def->def.method; - if (jl_is_method(def)) { - src = (jl_code_info_t*)def->source; - if (src && (jl_value_t*)src != jl_nothing) - src = jl_uncompress_ir(def, NULL, (jl_value_t*)src); - } - else { - jl_method_instance_t *mi = unspec->def; - jl_code_instance_t *uninferred = jl_cached_uninferred( - jl_atomic_load_relaxed(&mi->cache), 1); - assert(uninferred); - src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred->inferred); - assert(src); - } - if (src) { + jl_code_info_t *src = NULL; + JL_GC_PUSH1(&src); + jl_method_t *def = unspec->def->def.method; + if (jl_is_method(def)) { + src = (jl_code_info_t*)def->source; + if (src && (jl_value_t*)src != jl_nothing) + src = jl_uncompress_ir(def, NULL, (jl_value_t*)src); + } + else { + jl_method_instance_t *mi = unspec->def; + jl_code_instance_t *uninferred = jl_cached_uninferred( + jl_atomic_load_relaxed(&mi->cache), 1); + assert(uninferred); + src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred->inferred); + assert(src); + } + if (src) { + // TODO: first prepare recursive_compile_graph(unspec, src) before taking this lock to avoid recursion? + JL_LOCK(&jitlock); // TODO: use a better lock + if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) { assert(jl_is_code_info(src)); ++UnspecFPtrCount; jl_debuginfo_t *debuginfo = src->debuginfo; jl_atomic_store_release(&unspec->debuginfo, debuginfo); // n.b. this assumes the field was previously NULL, which is not entirely true jl_gc_wb(unspec, debuginfo); - _jl_compile_codeinst(unspec, src, *jl_ExecutionEngine->getContext()); + _jl_compile_codeinst(unspec, src); } - jl_callptr_t null = nullptr; - // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort - jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr); - JL_GC_POP(); + JL_UNLOCK(&jitlock); // Might GC } - JL_UNLOCK(&jl_ExecutionEngine->jitlock); // Might GC + JL_GC_POP(); + jl_callptr_t null = nullptr; + // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort + jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr); if (timed) { if (measure_compile_time_enabled) { auto end = jl_hrtime(); @@ -634,8 +937,8 @@ static auto countBasicBlocks(const Function &F) JL_NOTSAFEPOINT static constexpr size_t N_optlevels = 4; -static Expected selectOptLevel(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) { - TSM.withModuleDo([](Module &M) { +static orc::ThreadSafeModule selectOptLevel(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { + TSM.withModuleDo([](Module &M) JL_NOTSAFEPOINT { size_t opt_level = std::max(static_cast(jl_options.opt_level), 0); do { if (jl_generating_output()) { @@ -661,7 +964,10 @@ static Expected selectOptLevel(orc::ThreadSafeModule TSM, opt_level = std::min(opt_level, N_optlevels - 1); M.addModuleFlag(Module::Warning, "julia.optlevel", opt_level); }); - return std::move(TSM); + return TSM; +} +static orc::ThreadSafeModule selectOptLevel(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + return selectOptLevel(std::move(TSM)); } void jl_register_jit_object(const object::ObjectFile &debugObj, @@ -699,8 +1005,8 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin { { std::lock_guard lock(PluginMutex); assert(PendingObjs.count(&MR) == 0); - PendingObjs[&MR] = std::unique_ptr( - new JITObjectInfo{std::move(NewBuffer), std::move(NewObj), {}}); + PendingObjs[&MR] = std::unique_ptr(new JITObjectInfo{ + std::move(NewBuffer), std::move(NewObj), {}}); } } @@ -870,7 +1176,7 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { // TODO: Port our memory management optimisations to JITLink instead of using the // default InProcessMemoryManager. -std::unique_ptr createJITLinkMemoryManager() { +std::unique_ptr createJITLinkMemoryManager() JL_NOTSAFEPOINT { #if JL_LLVM_VERSION < 160000 return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper()); #else @@ -900,7 +1206,7 @@ class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar { } }; -RTDyldMemoryManager* createRTDyldMemoryManager(void); +RTDyldMemoryManager *createRTDyldMemoryManager(void) JL_NOTSAFEPOINT; // A simple forwarding class, since OrcJIT v2 needs a unique_ptr, while we have a shared_ptr class ForwardingMemoryManager : public RuntimeDyld::MemoryManager { @@ -909,7 +1215,10 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager { public: ForwardingMemoryManager(std::shared_ptr MemMgr) : MemMgr(MemMgr) {} - virtual ~ForwardingMemoryManager() = default; + ForwardingMemoryManager(ForwardingMemoryManager &) = delete; + virtual ~ForwardingMemoryManager() { + assert(!MemMgr); + } virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, StringRef SectionName) override { @@ -947,7 +1256,11 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager { return MemMgr->deregisterEHFrames(); } virtual bool finalizeMemory(std::string *ErrMsg = nullptr) override { - return MemMgr->finalizeMemory(ErrMsg); + bool b = false; + if (MemMgr.use_count() == 2) + b = MemMgr->finalizeMemory(ErrMsg); + MemMgr.reset(); + return b; } virtual void notifyObjectLoaded(RuntimeDyld &RTDyld, const object::ObjectFile &Obj) override { @@ -955,10 +1268,10 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager { } }; - -void registerRTDyldJITObject(const object::ObjectFile &Object, - const RuntimeDyld::LoadedObjectInfo &L, - const std::shared_ptr &MemMgr) +#ifndef JL_USE_JITLINK +static void registerRTDyldJITObject(orc::MaterializationResponsibility &MR, + const object::ObjectFile &Object, + const RuntimeDyld::LoadedObjectInfo &L) { StringMap loadedSections; for (const object::SectionRef &lSection : Object.sections()) { @@ -980,6 +1293,8 @@ void registerRTDyldJITObject(const object::ObjectFile &Object, auto DebugObject = L.getObjectForDebug(Object); // ELF requires us to make a copy to mutate the header with the section load addresses. On other platforms this is a no-op. jl_register_jit_object(DebugObject.getBinary() ? *DebugObject.getBinary() : Object, getLoadAddress); } +#endif + namespace { static std::unique_ptr createTargetMachine() JL_NOTSAFEPOINT { TargetOptions options = TargetOptions(); @@ -1078,9 +1393,6 @@ namespace { fixupTM(*TM); return std::unique_ptr(TM); } -} // namespace - -namespace { typedef NewPM PassManager; @@ -1131,14 +1443,14 @@ namespace { }; template - struct OptimizerT { - OptimizerT(TargetMachine &TM, SmallVector, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT { + struct sizedOptimizerT { + sizedOptimizerT(TargetMachine &TM, SmallVector, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT { for (size_t i = 0; i < N; i++) { PMs[i] = std::make_unique>>(PMCreator(TM, i, printers, llvm_printing_mutex)); } } - OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { auto PoolIdx = cast(cast(M.getModuleFlag("julia.optlevel"))->getValue())->getZExtValue(); assert(PoolIdx < N && "Invalid optimization pool index"); @@ -1243,12 +1555,23 @@ namespace { llvm_unreachable("optlevel is between 0 and 3!"); } }); - return Expected{std::move(TSM)}; + return TSM; } private: std::array>>, N> PMs; }; + // shim for converting a unique_ptr to a TransformFunction to a TransformFunction + template + struct IRTransformRef { + IRTransformRef(T &transform) : transform(transform) {} + OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + return transform(std::move(TSM), R); + } + private: + T &transform; + }; + template struct CompilerT : orc::IRCompileLayer::IRCompiler { @@ -1264,7 +1587,8 @@ namespace { size_t PoolIdx; if (auto opt_level = M.getModuleFlag("julia.optlevel")) { PoolIdx = cast(cast(opt_level)->getValue())->getZExtValue(); - } else { + } + else { PoolIdx = jl_options.opt_level; } assert(PoolIdx < N && "Invalid optimization level for compiler!"); @@ -1273,74 +1597,89 @@ namespace { std::array>>, N> TMs; }; +} - struct JITPointersT { - - JITPointersT(SharedBytesT &SharedBytes, std::mutex &Lock) JL_NOTSAFEPOINT - : SharedBytes(SharedBytes), Lock(Lock) {} +struct JuliaOJIT::OptimizerT { + OptimizerT(TargetMachine &TM, SmallVector, 0> &printers, std::mutex &llvm_printing_mutex) + : opt(TM, printers, llvm_printing_mutex) {} + orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { + return opt(std::move(TSM)); + } + OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + return opt(std::move(TSM)); + } +private: + struct sizedOptimizerT opt; +}; - Expected operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { - TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { - std::lock_guard locked(Lock); - for (auto &GV : make_early_inc_range(M.globals())) { - if (auto *Shared = getSharedBytes(GV)) { - ++InternedGlobals; - GV.replaceAllUsesWith(Shared); - GV.eraseFromParent(); - } +struct JuliaOJIT::JITPointersT { + JITPointersT(SharedBytesT &SharedBytes, std::mutex &Lock) JL_NOTSAFEPOINT + : SharedBytes(SharedBytes), Lock(Lock) {} + + orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { + TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { + std::lock_guard locked(Lock); + for (auto &GV : make_early_inc_range(M.globals())) { + if (auto *Shared = getSharedBytes(GV)) { + ++InternedGlobals; + GV.replaceAllUsesWith(Shared); + GV.eraseFromParent(); } + } - // Windows needs some inline asm to help - // build unwind tables, if they have any functions to decorate - if (!M.functions().empty()) - jl_decorate_module(M); - }); - return std::move(TSM); - } + // Windows needs some inline asm to help + // build unwind tables, if they have any functions to decorate + if (!M.functions().empty()) + jl_decorate_module(M); + }); + return TSM; + } + Expected operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + return operator()(std::move(TSM)); + } - private: - // optimize memory by turning long strings into memoized copies, instead of - // making a copy per object file of output. - // we memoize them using a StringSet with a custom-alignment allocator - // to ensure they are properly aligned - Constant *getSharedBytes(GlobalVariable &GV) JL_NOTSAFEPOINT { - // We could probably technically get away with - // interning even external linkage globals, - // as long as they have global unnamedaddr, - // but currently we shouldn't be emitting those - // except in imaging mode, and we don't want to - // do this optimization there. - if (GV.hasExternalLinkage() || !GV.hasGlobalUnnamedAddr()) { - return nullptr; - } - if (!GV.hasInitializer()) { - return nullptr; - } - if (!GV.isConstant()) { - return nullptr; - } - auto CDS = dyn_cast(GV.getInitializer()); - if (!CDS) { - return nullptr; - } - StringRef Data = CDS->getRawDataValues(); - if (Data.size() < 16) { - // Cutoff, since we don't want to intern small strings - return nullptr; - } - Align Required = GV.getAlign().valueOrOne(); - Align Preferred = MaxAlignedAlloc::alignment(Data.size()); - if (Required > Preferred) - return nullptr; - StringRef Interned = SharedBytes.insert(Data).first->getKey(); - assert(llvm::isAddrAligned(Preferred, Interned.data())); - return literal_static_pointer_val(Interned.data(), GV.getType()); +private: + // optimize memory by turning long strings into memoized copies, instead of + // making a copy per object file of output. + // we memoize them using a StringSet with a custom-alignment allocator + // to ensure they are properly aligned + Constant *getSharedBytes(GlobalVariable &GV) JL_NOTSAFEPOINT { + // We could probably technically get away with + // interning even external linkage globals, + // as long as they have global unnamedaddr, + // but currently we shouldn't be emitting those + // except in imaging mode, and we don't want to + // do this optimization there. + if (GV.hasExternalLinkage() || !GV.hasGlobalUnnamedAddr()) { + return nullptr; } + if (!GV.hasInitializer()) { + return nullptr; + } + if (!GV.isConstant()) { + return nullptr; + } + auto CDS = dyn_cast(GV.getInitializer()); + if (!CDS) { + return nullptr; + } + StringRef Data = CDS->getRawDataValues(); + if (Data.size() < 16) { + // Cutoff, since we don't want to intern small strings + return nullptr; + } + Align Required = GV.getAlign().valueOrOne(); + Align Preferred = MaxAlignedAlloc::alignment(Data.size()); + if (Required > Preferred) + return nullptr; + StringRef Interned = SharedBytes.insert(Data).first->getKey(); + assert(llvm::isAddrAligned(Preferred, Interned.data())); + return literal_static_pointer_val(Interned.data(), GV.getType()); + } - SharedBytesT &SharedBytes; - std::mutex &Lock; - }; -} + SharedBytesT &SharedBytes; + std::mutex &Lock; +}; struct JuliaOJIT::DLSymOptimizer { @@ -1362,20 +1701,24 @@ struct JuliaOJIT::DLSymOptimizer { #undef INIT_RUNTIME_LIBRARY } + ~DLSymOptimizer() JL_NOTSAFEPOINT = default; - void *lookup_symbol(void *libhandle, const char *fname) { + void *lookup_symbol(void *libhandle, const char *fname) JL_NOTSAFEPOINT { void *addr; jl_dlsym(libhandle, fname, &addr, 0); return addr; } - void *lookup(const char *libname, const char *fname) { + void *lookup(const char *libname, const char *fname) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER { StringRef lib(libname); StringRef f(fname); std::lock_guard lock(symbols_mutex); auto uit = user_symbols.find(lib); if (uit == user_symbols.end()) { + jl_task_t *ct = jl_current_task; + int8_t gc_state = jl_gc_unsafe_enter(ct->ptls); void *handle = jl_get_library_(libname, 0); + jl_gc_unsafe_leave(ct->ptls, gc_state); if (!handle) return nullptr; uit = user_symbols.insert(std::make_pair(lib, std::make_pair(handle, StringMap()))).first; @@ -1390,7 +1733,7 @@ struct JuliaOJIT::DLSymOptimizer { return handle; } - void *lookup(uintptr_t libidx, const char *fname) { + void *lookup(uintptr_t libidx, const char *fname) JL_NOTSAFEPOINT { std::lock_guard lock(symbols_mutex); runtime_symbols.resize(std::max(runtime_symbols.size(), libidx + 1)); auto it = runtime_symbols[libidx].second.find(fname); @@ -1402,7 +1745,7 @@ struct JuliaOJIT::DLSymOptimizer { return handle; } - void operator()(Module &M) { + void operator()(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER { for (auto &GV : M.globals()) { auto Name = GV.getName(); if (Name.starts_with("jlplt") && Name.ends_with("got")) { @@ -1518,7 +1861,7 @@ struct JuliaOJIT::DLSymOptimizer { bool named; }; -void optimizeDLSyms(Module &M) { +void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER { JuliaOJIT::DLSymOptimizer(true)(M); } @@ -1552,10 +1895,6 @@ llvm::DataLayout jl_create_datalayout(TargetMachine &TM) { return jl_data_layout; } -#ifdef _COMPILER_ASAN_ENABLED_ -int64_t ___asan_globals_registered; -#endif - JuliaOJIT::JuliaOJIT() : TM(createTargetMachine()), DL(jl_create_datalayout(*TM)), @@ -1564,34 +1903,27 @@ JuliaOJIT::JuliaOJIT() JD(ES.createBareJITDylib("JuliaOJIT")), ExternalJD(ES.createBareJITDylib("JuliaExternal")), DLSymOpt(std::make_unique(false)), - ContextPool([](){ - auto ctx = std::make_unique(); - #if JL_LLVM_VERSION < 170000 - SetOpaquePointer(*ctx); - #endif - return orc::ThreadSafeContext(std::move(ctx)); - }), #ifdef JL_USE_JITLINK MemMgr(createJITLinkMemoryManager()), ObjectLayer(ES, *MemMgr), - CompileLayer(ES, ObjectLayer, std::make_unique>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)), #else MemMgr(createRTDyldMemoryManager()), - ObjectLayer( + UnlockedObjectLayer( ES, [this]() { std::unique_ptr result(new ForwardingMemoryManager(MemMgr)); return result; } ), - LockLayer(ObjectLayer), - CompileLayer(ES, LockLayer, std::make_unique>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)), + ObjectLayer(UnlockedObjectLayer), #endif - JITPointersLayer(ES, CompileLayer, orc::IRTransformLayer::TransformFunction(JITPointersT(SharedBytes, RLST_mutex))), - OptimizeLayer(ES, JITPointersLayer, orc::IRTransformLayer::TransformFunction(OptimizerT(*TM, PrintLLVMTimers, llvm_printing_mutex))), - OptSelLayer(ES, OptimizeLayer, orc::IRTransformLayer::TransformFunction(selectOptLevel)) + CompileLayer(ES, ObjectLayer, std::make_unique>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)), + JITPointers(std::make_unique(SharedBytes, RLST_mutex)), + JITPointersLayer(ES, CompileLayer, IRTransformRef(*JITPointers)), + Optimizers(std::make_unique(*TM, PrintLLVMTimers, llvm_printing_mutex)), + OptimizeLayer(ES, JITPointersLayer, IRTransformRef(*Optimizers)), + OptSelLayer(ES, OptimizeLayer, static_cast(selectOptLevel)) { - JL_MUTEX_INIT(&this->jitlock, "JuliaOJIT"); #ifdef JL_USE_JITLINK # if defined(LLVM_SHLIB) // When dynamically linking against LLVM, use our custom EH frame registration code @@ -1606,12 +1938,7 @@ JuliaOJIT::JuliaOJIT() ObjectLayer.addPlugin(std::make_unique()); ObjectLayer.addPlugin(std::make_unique(jit_bytes_size)); #else - ObjectLayer.setNotifyLoaded( - [this](orc::MaterializationResponsibility &MR, - const object::ObjectFile &Object, - const RuntimeDyld::LoadedObjectInfo &LO) { - registerRTDyldJITObject(Object, LO, MemMgr); - }); + UnlockedObjectLayer.setNotifyLoaded(registerRTDyldJITObject); #endif std::string ErrorStr; @@ -1741,19 +2068,34 @@ JuliaOJIT::JuliaOJIT() #endif cantFail(GlobalJD.define(orc::absoluteSymbols(msan_crt))); #endif +#if JL_LLVM_VERSION < 190000 #ifdef _COMPILER_ASAN_ENABLED_ + // this is a hack to work around a bad assertion: + // /workspace/srcdir/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp:3028: llvm::Error llvm::orc::ExecutionSession::OL_notifyResolved(llvm::orc::MaterializationResponsibility&, const SymbolMap&): Assertion `(KV.second.getFlags() & ~JITSymbolFlags::Common) == (I->second & ~JITSymbolFlags::Common) && "Resolving symbol with incorrect flags"' failed. + // hopefully fixed upstream by e7698a13e319a9919af04d3d693a6f6ea7168a44 + static int64_t jl___asan_globals_registered; orc::SymbolMap asan_crt; #if JL_LLVM_VERSION >= 170000 - asan_crt[mangle("___asan_globals_registered")] = {ExecutorAddr::fromPtr(&___asan_globals_registered), JITSymbolFlags::Exported}; + asan_crt[mangle("___asan_globals_registered")] = {ExecutorAddr::fromPtr(&jl___asan_globals_registered), JITSymbolFlags::Common | JITSymbolFlags::Exported}; #else - asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&___asan_globals_registered, JITSymbolFlags::Exported); + asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&jl___asan_globals_registered, JITSymbolFlags::Common | JITSymbolFlags::Exported); #endif cantFail(JD.define(orc::absoluteSymbols(asan_crt))); #endif +#endif } JuliaOJIT::~JuliaOJIT() = default; +ThreadSafeContext JuliaOJIT::makeContext() +{ + auto ctx = std::make_unique(); + #if JL_LLVM_VERSION < 170000 + SetOpaquePointer(*ctx); + #endif + return orc::ThreadSafeContext(std::move(ctx)); +} + orc::SymbolStringPtr JuliaOJIT::mangle(StringRef Name) { std::string MangleName = getMangledName(Name); @@ -1773,40 +2115,32 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM) { JL_TIMING(LLVM_JIT, JIT_Total); ++ModulesAdded; -#ifndef JL_USE_JITLINK - orc::SymbolLookupSet NewExports; - TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { - for (auto &F : M.global_values()) { - if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) { - auto Name = ES.intern(getMangledName(F.getName())); - NewExports.add(std::move(Name)); - } - } - assert(!verifyLLVMIR(M)); - }); -#endif - - auto Err = OptSelLayer.add(JD, std::move(TSM)); + TSM = selectOptLevel(std::move(TSM)); + TSM = (*Optimizers)(std::move(TSM)); + TSM = (*JITPointers)(std::move(TSM)); + auto Lock = TSM.getContext().getLock(); + Module &M = *TSM.getModuleUnlocked(); + // Treat this as if one of the passes might contain a safepoint + // even though that shouldn't be the case and might be unwise + Expected> Obj = CompileLayer.getCompiler()(M); + if (!Obj) { + ES.reportError(Obj.takeError()); + errs() << "Failed to add module to JIT!\n"; + errs() << "Dumping failing module\n" << M << "\n"; + return; + } + { auto release = std::move(Lock); } + auto Err = JuliaOJIT::addObjectFile(JD, std::move(*Obj)); if (Err) { ES.reportError(std::move(Err)); - errs() << "Failed to add module to JIT!\n"; + errs() << "Failed to add objectfile to JIT!\n"; abort(); } -#ifndef JL_USE_JITLINK - // force eager compilation (for now), due to memory management specifics - // (can't handle compilation recursion) - auto Lookups = ES.lookup({{&JD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly}}, NewExports); - if (!Lookups) { - ES.reportError(Lookups.takeError()); - errs() << "Failed to lookup symbols in module!\n"; - } -#endif } Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, bool ShouldOptimize) { - if (auto Err = TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT -> Error - { + if (auto Err = TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT -> Error { if (M.getDataLayout().isDefault()) M.setDataLayout(DL); if (M.getDataLayout() != DL) @@ -1815,24 +2149,29 @@ Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, M.getDataLayout().getStringRepresentation() + " (module) vs " + DL.getStringRepresentation() + " (jit)", inconvertibleErrorCode()); - + // OrcJIT requires that all modules / files have unique names: + M.setModuleIdentifier((M.getModuleIdentifier() + Twine("-") + Twine(jl_atomic_fetch_add_relaxed(&jitcounter, 1))).str()); return Error::success(); - })) + })) return Err; + //if (ShouldOptimize) + // return OptimizeLayer.add(JD, std::move(TSM)); return CompileLayer.add(JD.getDefaultResourceTracker(), std::move(TSM)); } Error JuliaOJIT::addObjectFile(orc::JITDylib &JD, std::unique_ptr Obj) { assert(Obj && "Can not add null object"); -#ifdef JL_USE_JITLINK + // OrcJIT requires that all modules / files have unique names: + // https://llvm.org/doxygen/namespacellvm_1_1orc.html#a1f5a1bc60c220cdccbab0f26b2a425e1 + // so we have to force a copy here + std::string Name = ("jitted-" + Twine(jl_atomic_fetch_add_relaxed(&jitcounter, 1))).str(); + Obj = Obj->getMemBufferCopy(Obj->getBuffer(), Name); return ObjectLayer.add(JD.getDefaultResourceTracker(), std::move(Obj)); -#else - return LockLayer.add(JD.getDefaultResourceTracker(), std::move(Obj)); -#endif } SmallVector JuliaOJIT::findSymbols(ArrayRef Names) { + // assert(MemMgr.use_count() == 1); (true single-threaded, but slightly race-y to assert it with concurrent threads) DenseMap Unmangled; orc::SymbolLookupSet Exports; for (StringRef Name : Names) { @@ -1978,6 +2317,7 @@ void JuliaOJIT::enableJITDebuggingSupport() addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderGDBAllocAction); auto registerJITLoaderGDBWrapper = addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderGDBWrapper); cantFail(JD.define(orc::absoluteSymbols(GDBFunctions))); + (void)registerJITLoaderGDBWrapper; if (TM->getTargetTriple().isOSBinFormatMachO()) ObjectLayer.addPlugin(cantFail(orc::GDBJITDebugInfoRegistrationPlugin::Create(ES, JD, TM->getTargetTriple()))); #ifndef _COMPILER_ASAN_ENABLED_ // TODO: Fix duplicated sections spam #51794 @@ -2013,12 +2353,12 @@ void JuliaOJIT::enableOProfileJITEventListener() void JuliaOJIT::enablePerfJITEventListener() { #if JL_LLVM_VERSION >= 180000 - orc::SymbolMap PerfFunctions; - auto StartAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfStart); - auto EndAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfEnd); - auto ImplAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfImpl); - cantFail(JD.define(orc::absoluteSymbols(PerfFunctions))); if (TM->getTargetTriple().isOSBinFormatELF()) { + orc::SymbolMap PerfFunctions; + auto StartAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfStart); + auto EndAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfEnd); + auto ImplAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfImpl); + cantFail(JD.define(orc::absoluteSymbols(PerfFunctions))); ObjectLayer.addPlugin(cantFail(DebugInfoPreservationPlugin::Create())); //ObjectLayer.addPlugin(cantFail(PerfSupportPlugin::Create( // ES.getExecutorProcessControl(), *JD, true, true))); @@ -2032,7 +2372,7 @@ void JuliaOJIT::enablePerfJITEventListener() void JuliaOJIT::RegisterJITEventListener(JITEventListener *L) { if (L) - ObjectLayer.registerJITEventListener(*L); + UnlockedObjectLayer.registerJITEventListener(*L); } void JuliaOJIT::enableJITDebuggingSupport() { @@ -2071,7 +2411,7 @@ std::string JuliaOJIT::getMangledName(const GlobalValue *GV) size_t JuliaOJIT::getTotalBytes() const { - auto bytes = jit_bytes_size.load(std::memory_order_relaxed); + auto bytes = jl_atomic_load_relaxed(&jit_bytes_size); #ifndef JL_USE_JITLINK size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT; bytes += getRTDyldMemoryManagerTotalBytes(MemMgr.get()); @@ -2081,7 +2421,7 @@ size_t JuliaOJIT::getTotalBytes() const void JuliaOJIT::addBytes(size_t bytes) { - jit_bytes_size.fetch_add(bytes, std::memory_order_relaxed); + jl_atomic_fetch_add_relaxed(&jit_bytes_size, bytes); } void JuliaOJIT::printTimers() @@ -2326,74 +2666,6 @@ static void jl_decorate_module(Module &M) { #undef ASM_USES_ELF } -#ifndef JL_USE_JITLINK -// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable -static int jl_add_to_ee( - orc::ThreadSafeModule &M, - const StringMap &NewExports, - DenseMap &Queued, - SmallVectorImpl &Stack) -{ - // First check if the TSM is empty (already compiled) - if (!M) - return 0; - // Next check and record if it is on the stack somewhere - { - auto &Id = Queued[&M]; - if (Id) - return Id; - Stack.push_back(&M); - Id = Stack.size(); - } - // Finally work out the SCC - int depth = Stack.size(); - int MergeUp = depth; - SmallVector Children; - M.withModuleDo([&](Module &m) JL_NOTSAFEPOINT { - for (auto &F : m.global_objects()) { - if (F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) { - auto Callee = NewExports.find(F.getName()); - if (Callee != NewExports.end()) { - auto *CM = Callee->second; - if (*CM && CM != &M) { - auto Down = Queued.find(CM); - if (Down != Queued.end()) - MergeUp = std::min(MergeUp, Down->second); - else - Children.push_back(CM); - } - } - } - } - }); - assert(MergeUp > 0); - for (auto *CM : Children) { - int Down = jl_add_to_ee(*CM, NewExports, Queued, Stack); - assert(Down <= (int)Stack.size()); - if (Down) - MergeUp = std::min(MergeUp, Down); - } - if (MergeUp < depth) - return MergeUp; - while (1) { - // Not in a cycle (or at the top of it) - // remove SCC state and merge every CM from the cycle into M - orc::ThreadSafeModule *CM = Stack.back(); - auto it = Queued.find(CM); - assert(it->second == (int)Stack.size()); - Queued.erase(it); - Stack.pop_back(); - if ((int)Stack.size() < depth) { - assert(&M == CM); - break; - } - jl_merge_module(M, std::move(*CM)); - } - jl_ExecutionEngine->addModule(std::move(M)); - return 0; -} -#endif - // helper function for adding a DLLImport (dlsym) address to the execution engine void add_named_global(StringRef name, void *addr) { diff --git a/src/jitlayers.h b/src/jitlayers.h index f4b9a6ea5395a..ba4ac3081795e 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -69,7 +69,6 @@ using namespace llvm; extern "C" jl_cgparams_t jl_default_cgparams; -extern arraylist_t new_invokes; DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeContext, LLVMOrcThreadSafeContextRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeModule, LLVMOrcThreadSafeModuleRef) @@ -154,11 +153,11 @@ struct jl_locked_stream { std::unique_lock lck; ios_t *&stream; - lock(std::mutex &mutex, ios_t *&stream) JL_NOTSAFEPOINT + lock(std::mutex &mutex, ios_t *&stream) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER : lck(mutex), stream(stream) {} lock(lock&) = delete; lock(lock&&) JL_NOTSAFEPOINT = default; - ~lock() JL_NOTSAFEPOINT = default; + ~lock() JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT = default; ios_t *&operator*() JL_NOTSAFEPOINT { return stream; @@ -177,8 +176,8 @@ struct jl_locked_stream { } }; - jl_locked_stream() JL_NOTSAFEPOINT = default; - ~jl_locked_stream() JL_NOTSAFEPOINT = default; + jl_locked_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER = default; + ~jl_locked_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default; lock operator*() JL_NOTSAFEPOINT { return lock(mutex, stream); @@ -210,12 +209,12 @@ struct jl_codegen_call_target_t { jl_returninfo_t::CallingConv cc; unsigned return_roots; llvm::Function *decl; + llvm::Function *oc; bool specsig; }; typedef SmallVector, 0> jl_workqueue_t; -// TODO DenseMap? -typedef std::map> jl_compiled_functions_t; + typedef std::list> CallFrames; struct jl_codegen_params_t { orc::ThreadSafeContext tsctx; @@ -229,7 +228,6 @@ struct jl_codegen_params_t { typedef StringMap SymMapGV; // outputs jl_workqueue_t workqueue; - jl_compiled_functions_t compiled_functions; std::map global_targets; std::map, GlobalVariable*> external_fns; std::map ditypes; @@ -292,13 +290,20 @@ enum CompilationPolicy { Extern = 1, }; -void jl_compile_workqueue( - jl_codegen_params_t ¶ms, - CompilationPolicy policy); - Function *jl_cfunction_object(jl_function_t *f, jl_value_t *rt, jl_tupletype_t *argt, jl_codegen_params_t ¶ms); +Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t ¶ms) JL_NOTSAFEPOINT; +void emit_specsig_to_fptr1( + Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots, + jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure, + size_t nargs, + jl_codegen_params_t ¶ms, + Function *target, + size_t min_world, size_t max_world) JL_NOTSAFEPOINT; +Function *get_or_emit_fptr1(StringRef Name, Module *M) JL_NOTSAFEPOINT; +void jl_init_function(Function *F, const Triple &TT) JL_NOTSAFEPOINT; + void add_named_global(StringRef name, void *addr) JL_NOTSAFEPOINT; static inline Constant *literal_static_pointer_val(const void *p, Type *T) JL_NOTSAFEPOINT @@ -371,6 +376,11 @@ using OptimizerResultT = Expected; using SharedBytesT = StringSet::MapEntryTy)>>; class JuliaOJIT { +private: + // any verification the user wants to do when adding an OwningResource to the pool + template + static void verifyResource(AnyT &resource) JL_NOTSAFEPOINT { } + static void verifyResource(orc::ThreadSafeContext &context) JL_NOTSAFEPOINT { assert(context.getContext()); } public: #ifdef JL_USE_JITLINK typedef orc::ObjectLinkingLayer ObjLayerT; @@ -385,13 +395,13 @@ class JuliaOJIT { std::unique_ptr O) override { JL_TIMING(LLVM_JIT, JIT_Link); #ifndef JL_USE_JITLINK - std::lock_guard lock(EmissionMutex); + std::lock_guard lock(EmissionMutex); #endif BaseLayer.emit(std::move(R), std::move(O)); } private: orc::ObjectLayer &BaseLayer; - std::mutex EmissionMutex; + std::recursive_mutex EmissionMutex; }; #endif typedef orc::IRCompileLayer CompileLayerT; @@ -420,11 +430,16 @@ class JuliaOJIT { : pool(pool), resource(std::move(resource)) {} OwningResource(const OwningResource &) = delete; OwningResource &operator=(const OwningResource &) = delete; - OwningResource(OwningResource &&) JL_NOTSAFEPOINT = default; + OwningResource(OwningResource &&other) JL_NOTSAFEPOINT + : pool(other.pool), resource(std::move(other.resource)) { + other.resource.reset(); + } OwningResource &operator=(OwningResource &&) JL_NOTSAFEPOINT = default; ~OwningResource() JL_NOTSAFEPOINT { // _LEAVE - if (resource) + if (resource) { + verifyResource(*resource); pool.release(std::move(*resource)); + } } ResourceT release() JL_NOTSAFEPOINT { ResourceT res(std::move(*resource)); @@ -510,7 +525,11 @@ class JuliaOJIT { std::unique_ptr mutex; }; + typedef ResourcePool> ContextPoolT; + struct DLSymOptimizer; + struct OptimizerT; + struct JITPointersT; #ifndef JL_USE_JITLINK void RegisterJITEventListener(JITEventListener *L) JL_NOTSAFEPOINT; @@ -528,7 +547,7 @@ class JuliaOJIT { orc::SymbolStringPtr mangle(StringRef Name) JL_NOTSAFEPOINT; void addGlobalMapping(StringRef Name, uint64_t Addr) JL_NOTSAFEPOINT; - void addModule(orc::ThreadSafeModule M) JL_NOTSAFEPOINT; + void addModule(orc::ThreadSafeModule M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER; //Methods for the C API Error addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, @@ -552,15 +571,7 @@ class JuliaOJIT { uint64_t getGlobalValueAddress(StringRef Name) JL_NOTSAFEPOINT; uint64_t getFunctionAddress(StringRef Name) JL_NOTSAFEPOINT; StringRef getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl_code_instance_t *codeinst) JL_NOTSAFEPOINT; - auto getContext() JL_NOTSAFEPOINT { - return *ContextPool; - } - orc::ThreadSafeContext acquireContext() { // JL_NOTSAFEPOINT_ENTER? - return ContextPool.acquire(); - } - void releaseContext(orc::ThreadSafeContext &&ctx) { // JL_NOTSAFEPOINT_LEAVE? - ContextPool.release(std::move(ctx)); - } + orc::ThreadSafeContext makeContext() JL_NOTSAFEPOINT; const DataLayout& getDataLayout() const JL_NOTSAFEPOINT; // TargetMachine pass-through methods @@ -576,22 +587,21 @@ class JuliaOJIT { void addBytes(size_t bytes) JL_NOTSAFEPOINT; void printTimers() JL_NOTSAFEPOINT; - jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT { + jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return dump_emitted_mi_name_stream; } - jl_locked_stream &get_dump_compiles_stream() JL_NOTSAFEPOINT { + jl_locked_stream &get_dump_compiles_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return dump_compiles_stream; } - jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT { + jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return dump_llvm_opt_stream; } std::string getMangledName(StringRef Name) JL_NOTSAFEPOINT; std::string getMangledName(const GlobalValue *GV) JL_NOTSAFEPOINT; - // Note that this is a safepoint due to jl_get_library_ and jl_dlsym calls - void optimizeDLSyms(Module &M); - - jl_mutex_t jitlock; + // Note that this is a potential safepoint due to jl_get_library_ and jl_dlsym calls + // but may be called from inside safe-regions due to jit compilation locks + void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER; private: @@ -618,20 +628,20 @@ class JuliaOJIT { std::mutex llvm_printing_mutex{}; SmallVector, 0> PrintLLVMTimers; - ResourcePool> ContextPool; - - std::atomic jit_bytes_size{0}; -#ifndef JL_USE_JITLINK - const std::shared_ptr MemMgr; -#else + _Atomic(size_t) jit_bytes_size{0}; + _Atomic(size_t) jitcounter{0}; +#ifdef JL_USE_JITLINK const std::unique_ptr MemMgr; -#endif ObjLayerT ObjectLayer; -#ifndef JL_USE_JITLINK - LockLayerT LockLayer; +#else + const std::shared_ptr MemMgr; // shared_ptr protected by LockLayerT.EmissionMutex + ObjLayerT UnlockedObjectLayer; + LockLayerT ObjectLayer; #endif CompileLayerT CompileLayer; + std::unique_ptr JITPointers; JITPointersLayerT JITPointersLayer; + std::unique_ptr Optimizers; OptimizeLayerT OptimizeLayer; OptSelLayerT OptSelLayer; }; diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index f712f154ed896..71a78b1c20fc7 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -547,21 +547,6 @@ YY(jl_getUnwindInfo) \ YY(jl_get_libllvm) \ YY(jl_register_passbuilder_callbacks) \ - YY(LLVMExtraMPMAddCPUFeaturesPass) \ - YY(LLVMExtraMPMAddRemoveNIPass) \ - YY(LLVMExtraMPMAddMultiVersioningPass) \ - YY(LLVMExtraMPMAddRemoveJuliaAddrspacesPass) \ - YY(LLVMExtraMPMAddRemoveAddrspacesPass) \ - YY(LLVMExtraMPMAddLowerPTLSPass) \ - YY(LLVMExtraFPMAddDemoteFloat16Pass) \ - YY(LLVMExtraFPMAddLateLowerGCPass) \ - YY(LLVMExtraFPMAddAllocOptPass) \ - YY(LLVMExtraFPMAddPropagateJuliaAddrspacesPass) \ - YY(LLVMExtraFPMAddLowerExcHandlersPass) \ - YY(LLVMExtraFPMAddGCInvariantVerifierPass) \ - YY(LLVMExtraFPMAddFinalLowerGCPass) \ - YY(LLVMExtraLPMAddJuliaLICMPass) \ - YY(LLVMExtraLPMAddLowerSIMDLoopPass) \ YY(JLJITGetLLVMOrcExecutionSession) \ YY(JLJITGetJuliaOJIT) \ YY(JLJITGetExternalJITDylib) \ diff --git a/src/jlfrontend.scm b/src/jlfrontend.scm index 463e39c41d00a..808af18ebfdbd 100644 --- a/src/jlfrontend.scm +++ b/src/jlfrontend.scm @@ -199,28 +199,6 @@ (error-wrap (lambda () (julia-expand-macroscope expr)))) -;; construct default definitions of `eval` for non-bare modules -;; called by jl_eval_module_expr -(define (module-default-defs name file line) - (jl-expand-to-thunk - (let* ((loc (if (and (eq? file 'none) (eq? line 0)) '() `((line ,line ,file)))) - (x (if (eq? name 'x) 'y 'x)) - (mex (if (eq? name 'mapexpr) 'map_expr 'mapexpr))) - `(block - (= (call eval ,x) - (block - ,@loc - (call (core eval) ,name ,x))) - (= (call include (:: ,x (top AbstractString))) - (block - ,@loc - (call (core _call_latest) (top include) ,name ,x))) - (= (call include (:: ,mex (top Function)) (:: ,x (top AbstractString))) - (block - ,@loc - (call (core _call_latest) (top include) ,mex ,name ,x))))) - file line)) - ; run whole frontend on a string. useful for testing. (define (fe str) (expand-toplevel-expr (julia-parse str) 'none 0)) diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index 4b3e6ae96898b..b48cb48bf0b79 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -4854,10 +4854,14 @@ f(x) = yt(x) ;; separate trycatch and tryfinally blocks earlier. (mark-label catch) (if finally - (begin (enter-finally-block catchcode #f) ;; enter block via exception + (begin (set! finally-handler last-finally-handler) + (set! catch-token-stack (cons handler-token catch-token-stack)) + (compile (caddr e) break-labels #f #f) ;; enter block via exception + (emit '(call (top rethrow))) + (emit-return tail '(null)) ; unreachable + (set! catch-token-stack (cdr catch-token-stack)) (mark-label endl) ;; non-exceptional control flow enters here - (set! finally-handler last-finally-handler) - (compile (caddr e) break-labels #f #f) + (compile (renumber-assigned-ssavalues (caddr e)) break-labels #f #f) ;; emit actions to be taken at exit of finally ;; block, depending on the tag variable `finally` (let loop ((actions (caddr my-finally-handler))) diff --git a/src/julia.h b/src/julia.h index 7bb5f31eda708..1d36dba519700 100644 --- a/src/julia.h +++ b/src/julia.h @@ -426,8 +426,8 @@ typedef struct _jl_opaque_closure_t { jl_value_t *captures; size_t world; jl_method_t *source; - jl_fptr_args_t invoke; - void *specptr; + jl_fptr_args_t invoke; // n.b. despite the similar name, this is not an invoke ABI (jl_call_t / julia.call2), but rather the fptr1 (jl_fptr_args_t / julia.call) ABI + void *specptr; // n.b. despite the similarity in field name, this is not arbitrary private data for jlcall, but rather the codegen ABI for specsig, and is mandatory if specsig is valid } jl_opaque_closure_t; // This type represents an executable operation @@ -475,7 +475,7 @@ typedef struct _jl_code_instance_t { // & 0b100 == From image _Atomic(uint8_t) precompile; // if set, this will be added to the output system image uint8_t relocatability; // nonzero if all roots are built into sysimg or tagged by module key - _Atomic(jl_callptr_t) invoke; // jlcall entry point + _Atomic(jl_callptr_t) invoke; // jlcall entry point usually, but if this codeinst belongs to an OC Method, then this is an jl_fptr_args_t fptr1 instead, unless it is not, because it is a special token object instead union _jl_generic_specptr_t { _Atomic(void*) fptr; _Atomic(jl_fptr_args_t) fptr1; @@ -620,6 +620,7 @@ typedef struct _jl_weakref_t { jl_value_t *value; } jl_weakref_t; +// N.B: Needs to be synced with runtime_internals.jl enum jl_partition_kind { // Constant: This binding partition is a constant declared using `const` // ->restriction holds the constant value @@ -684,7 +685,7 @@ typedef struct __attribute__((aligned(8))) _jl_binding_partition_t { _Atomic(jl_ptr_kind_union_t) restriction; size_t min_world; _Atomic(size_t) max_world; - _Atomic(struct _jl_binding_partition_t*) next; + _Atomic(struct _jl_binding_partition_t *) next; size_t reserved; // Reserved for ->kind. Currently this holds the low bits of ->restriction during serialization } jl_binding_partition_t; @@ -1845,8 +1846,9 @@ JL_DLLEXPORT jl_sym_t *jl_symbol_n(const char *str, size_t len) JL_NOTSAFEPOINT; JL_DLLEXPORT jl_sym_t *jl_gensym(void); JL_DLLEXPORT jl_sym_t *jl_tagged_gensym(const char *str, size_t len); JL_DLLEXPORT jl_sym_t *jl_get_root_symbol(void); -JL_DLLEXPORT jl_value_t *jl_get_binding_value(jl_binding_t *b JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT; -JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_const(jl_binding_t *b JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT; +JL_DLLEXPORT jl_value_t *jl_get_binding_value(jl_binding_t *b JL_PROPAGATES_ROOT); +JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_const(jl_binding_t *b JL_PROPAGATES_ROOT); +JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_resolved_and_const(jl_binding_t *b JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT; JL_DLLEXPORT jl_value_t *jl_declare_const_gf(jl_binding_t *b, jl_module_t *mod, jl_sym_t *name); JL_DLLEXPORT jl_method_t *jl_method_def(jl_svec_t *argdata, jl_methtable_t *mt, jl_code_info_t *f, jl_module_t *module); JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world, jl_code_instance_t **cache); @@ -2008,8 +2010,8 @@ JL_DLLEXPORT jl_value_t *jl_checked_swap(jl_binding_t *b, jl_module_t *mod, jl_s JL_DLLEXPORT jl_value_t *jl_checked_replace(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *expected, jl_value_t *rhs); JL_DLLEXPORT jl_value_t *jl_checked_modify(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *op, jl_value_t *rhs); JL_DLLEXPORT jl_value_t *jl_checked_assignonce(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED); -JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT; -JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val2(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, enum jl_partition_kind) JL_NOTSAFEPOINT; +JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED); +JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val2(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, enum jl_partition_kind); JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from); JL_DLLEXPORT void jl_module_use(jl_module_t *to, jl_module_t *from, jl_sym_t *s); JL_DLLEXPORT void jl_module_use_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname); @@ -2339,7 +2341,13 @@ JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_RO extern JL_DLLIMPORT int jl_task_gcstack_offset; extern JL_DLLIMPORT int jl_task_ptls_offset; +#ifdef __cplusplus +} +#endif #include "julia_locks.h" // requires jl_task_t definition +#ifdef __cplusplus +extern "C" { +#endif // Return the exception currently being handled, or `jl_nothing`. // diff --git a/src/julia_atomics.h b/src/julia_atomics.h index c094afcc54cd5..d05f0fafab28f 100644 --- a/src/julia_atomics.h +++ b/src/julia_atomics.h @@ -103,12 +103,12 @@ enum jl_memory_order { // this wrong thus we include the correct definitions here (with implicit // conversion), instead of using the macro version template -T jl_atomic_load(std::atomic *ptr) +T jl_atomic_load(const std::atomic *ptr) { return std::atomic_load(ptr); } template -T jl_atomic_load_explicit(std::atomic *ptr, std::memory_order order) +T jl_atomic_load_explicit(const std::atomic *ptr, std::memory_order order) { return std::atomic_load_explicit(ptr, order); } diff --git a/src/julia_internal.h b/src/julia_internal.h index 82c91c6d073af..8c4ee9fca36e0 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -372,6 +372,8 @@ extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT size_t jl_typeinf_world; extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED; +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; + JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; @@ -518,30 +520,6 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE #define GC_MAX_SZCLASS (2032-sizeof(void*)) static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, ""); - -// Size does NOT include the type tag!! -STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) -{ - jl_value_t *v; - const size_t allocsz = sz + sizeof(jl_taggedvalue_t); - if (sz <= GC_MAX_SZCLASS) { - int pool_id = jl_gc_szclass(allocsz); - jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; - int osize = jl_gc_sizeclasses[pool_id]; - // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in - // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) - v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); - } - else { - if (allocsz < sz) // overflow in adding offs, size was "negative" - jl_throw(jl_memory_exception); - v = jl_gc_big_alloc_noinline(ptls, allocsz); - } - jl_set_typeof(v, ty); - maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); - return v; -} - /* Programming style note: When using jl_gc_alloc, do not JL_GC_PUSH it into a * gc frame, until it has been fully initialized. An uninitialized value in a * gc frame can crash upon encountering the first safepoint. By delaying use of @@ -910,13 +888,10 @@ EXTERN_INLINE_DECLARE enum jl_partition_kind decode_restriction_kind(jl_ptr_kind #endif } -STATIC_INLINE jl_value_t *decode_restriction_value(jl_ptr_kind_union_t pku) JL_NOTSAFEPOINT +STATIC_INLINE jl_value_t *decode_restriction_value(jl_ptr_kind_union_t JL_PROPAGATES_ROOT pku) JL_NOTSAFEPOINT { #ifdef _P64 jl_value_t *val = (jl_value_t*)(pku & ~0x7); - // This is a little bit of a lie at the moment - it is one of the things that - // can go wrong with binding replacement. - JL_GC_PROMISE_ROOTED(val); return val; #else return pku.val; @@ -950,14 +925,8 @@ STATIC_INLINE int jl_bkind_is_some_guard(enum jl_partition_kind kind) JL_NOTSAFE return kind == BINDING_KIND_FAILED || kind == BINDING_KIND_GUARD || kind == BINDING_KIND_DECLARED; } -EXTERN_INLINE_DECLARE jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b, size_t world) JL_NOTSAFEPOINT { - if (!b) - return NULL; - assert(jl_is_binding(b)); - return jl_atomic_load_relaxed(&b->partitions); -} - -JL_DLLEXPORT jl_binding_partition_t *jl_get_globalref_partition(jl_globalref_t *gr, size_t world); +JL_DLLEXPORT jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b JL_PROPAGATES_ROOT, size_t world); +JL_DLLEXPORT jl_binding_partition_t *jl_get_globalref_partition(jl_globalref_t *gr JL_PROPAGATES_ROOT, size_t world); EXTERN_INLINE_DECLARE uint8_t jl_bpart_get_kind(jl_binding_partition_t *bpart) JL_NOTSAFEPOINT { return decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)); @@ -1715,13 +1684,14 @@ JL_DLLEXPORT int jl_tupletype_length_compat(jl_value_t *v, size_t nargs) JL_NOTS JL_DLLEXPORT jl_value_t *jl_argtype_with_function(jl_value_t *f, jl_value_t *types0); JL_DLLEXPORT jl_value_t *jl_argtype_with_function_type(jl_value_t *ft JL_MAYBE_UNROOTED, jl_value_t *types0); +JL_DLLEXPORT jl_value_t *jl_argtype_without_function(jl_value_t *ftypes); JL_DLLEXPORT unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type); -void register_eh_frames(uint8_t *Addr, size_t Size); -void deregister_eh_frames(uint8_t *Addr, size_t Size); +void register_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT; +void deregister_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT; -STATIC_INLINE void *jl_get_frame_addr(void) +STATIC_INLINE void *jl_get_frame_addr(void) JL_NOTSAFEPOINT { #ifdef __GNUC__ return __builtin_frame_address(0); diff --git a/src/julia_locks.h b/src/julia_locks.h index 5774ddada60c6..4d1345177f965 100644 --- a/src/julia_locks.h +++ b/src/julia_locks.h @@ -103,6 +103,33 @@ JL_DLLEXPORT void jl_unlock_field(jl_mutex_t *v) JL_NOTSAFEPOINT; #ifdef __cplusplus } + +#include +#include +// simple C++ shim around a std::unique_lock + gc-safe + disabled finalizers region +// since we nearly always want that combination together +class jl_unique_gcsafe_lock { +public: + int8_t gc_state; + std::unique_lock native; + explicit jl_unique_gcsafe_lock(std::mutex &native) JL_NOTSAFEPOINT_ENTER + { + jl_task_t *ct = jl_current_task; + gc_state = jl_gc_safe_enter(ct->ptls); + this->native = std::unique_lock(native); + ct->ptls->engine_nqueued++; // disables finalizers until inference is finished on this method graph + } + jl_unique_gcsafe_lock(jl_unique_gcsafe_lock &&native) = delete; + jl_unique_gcsafe_lock(jl_unique_gcsafe_lock &native) = delete; + ~jl_unique_gcsafe_lock() JL_NOTSAFEPOINT_LEAVE { + jl_task_t *ct = jl_current_task; + jl_gc_safe_leave(ct->ptls, gc_state); + ct->ptls->engine_nqueued--; // enable finalizers (but don't run them until the next gc) + } + void wait(std::condition_variable& cond) JL_NOTSAFEPOINT { + cond.wait(native); + } +}; #endif #endif diff --git a/src/julia_threads.h b/src/julia_threads.h index 17e8d7d466044..67da2978b4267 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -5,6 +5,7 @@ #define JL_THREADS_H #include "gc-tls.h" +#include "gc-tls-common.h" #include "julia_atomics.h" #ifndef _OS_WINDOWS_ #include "pthread.h" @@ -155,6 +156,7 @@ typedef struct _jl_tls_states_t { // Counter to disable finalizer **on the current thread** int finalizers_inhibited; jl_gc_tls_states_t gc_tls; // this is very large, and the offset of the first member is baked into codegen + jl_gc_tls_states_common_t gc_tls_common; // common tls for both GCs volatile sig_atomic_t defer_signal; _Atomic(struct _jl_task_t*) current_task; struct _jl_task_t *next_task; diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 0ec88c9d56356..a9e1b1e02da42 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -646,14 +646,9 @@ void Optimizer::initializeAlloca(IRBuilder<> &prolog_builder, AllocaInst *buff, return; assert(!buff->isArrayAllocation()); Type *T = buff->getAllocatedType(); - Value *Init = UndefValue::get(T); - if ((allockind & AllocFnKind::Zeroed) != AllocFnKind::Unknown) - Init = Constant::getNullValue(T); // zero, as described - else if (allockind == AllocFnKind::Unknown) - Init = Constant::getNullValue(T); // assume zeroed since we didn't find the attribute - else - Init = prolog_builder.CreateFreeze(UndefValue::get(T)); // assume freeze, since LLVM does not natively support this case - prolog_builder.CreateStore(Init, buff); + const DataLayout &DL = F.getParent()->getDataLayout(); + prolog_builder.CreateMemSet(buff, ConstantInt::get(Type::getInt8Ty(prolog_builder.getContext()), 0), DL.getTypeAllocSize(T), buff->getAlign()); + } // This function should not erase any safepoint so that the lifetime marker can find and cache diff --git a/src/llvm-julia-passes.inc b/src/llvm-julia-passes.inc index c41ecbba87b6a..523c9fbcd3402 100644 --- a/src/llvm-julia-passes.inc +++ b/src/llvm-julia-passes.inc @@ -1,26 +1,26 @@ //Module passes #ifdef MODULE_PASS -MODULE_PASS("CPUFeatures", CPUFeaturesPass, CPUFeaturesPass()) -MODULE_PASS("RemoveNI", RemoveNIPass, RemoveNIPass()) -MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass, MultiVersioningPass()) -MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass, RemoveJuliaAddrspacesPass()) -MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass, RemoveAddrspacesPass()) -MODULE_PASS("LowerPTLSPass", LowerPTLSPass, LowerPTLSPass()) +MODULE_PASS("CPUFeatures", CPUFeaturesPass()) +MODULE_PASS("RemoveNI", RemoveNIPass()) +MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass()) +MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass()) +MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass()) +MODULE_PASS("LowerPTLSPass", LowerPTLSPass()) #endif //Function passes #ifdef FUNCTION_PASS -FUNCTION_PASS("DemoteFloat16", DemoteFloat16Pass, DemoteFloat16Pass()) -FUNCTION_PASS("LateLowerGCFrame", LateLowerGCPass, LateLowerGCPass()) -FUNCTION_PASS("AllocOpt", AllocOptPass, AllocOptPass()) -FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass, PropagateJuliaAddrspacesPass()) -FUNCTION_PASS("LowerExcHandlers", LowerExcHandlersPass, LowerExcHandlersPass()) -FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass, GCInvariantVerifierPass()) -FUNCTION_PASS("FinalLowerGC", FinalLowerGCPass, FinalLowerGCPass()) +FUNCTION_PASS("DemoteFloat16", DemoteFloat16Pass()) +FUNCTION_PASS("LateLowerGCFrame", LateLowerGCPass()) +FUNCTION_PASS("AllocOpt", AllocOptPass()) +FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass()) +FUNCTION_PASS("LowerExcHandlers", LowerExcHandlersPass()) +FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass()) +FUNCTION_PASS("FinalLowerGC", FinalLowerGCPass()) #endif //Loop passes #ifdef LOOP_PASS -LOOP_PASS("JuliaLICM", JuliaLICMPass, JuliaLICMPass()) -LOOP_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass()) +LOOP_PASS("JuliaLICM", JuliaLICMPass()) +LOOP_PASS("LowerSIMDLoop", LowerSIMDLoopPass()) #endif diff --git a/src/llvm_api.cpp b/src/llvm_api.cpp index e98c375b711b3..8c48b5661f984 100644 --- a/src/llvm_api.cpp +++ b/src/llvm_api.cpp @@ -10,7 +10,6 @@ #endif #include "jitlayers.h" -#include "passes.h" #include #include @@ -58,14 +57,6 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::IRCompileLayer, LLVMOrcIRCompileLayerRef DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::MaterializationResponsibility, LLVMOrcMaterializationResponsibilityRef) -typedef struct LLVMOpaqueModulePassManager *LLVMModulePassManagerRef; -typedef struct LLVMOpaqueFunctionPassManager *LLVMFunctionPassManagerRef; -typedef struct LLVMOpaqueLoopPassManager *LLVMLoopPassManagerRef; - -DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::ModulePassManager, LLVMModulePassManagerRef) -DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::FunctionPassManager, LLVMFunctionPassManagerRef) -DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::LoopPassManager, LLVMLoopPassManagerRef) - extern "C" { JL_DLLEXPORT_CODEGEN JuliaOJITRef JLJITGetJuliaOJIT_impl(void) @@ -150,27 +141,4 @@ JLJITGetIRCompileLayer_impl(JuliaOJITRef JIT) return wrap(&unwrap(JIT)->getIRCompileLayer()); } -#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \ - JL_DLLEXPORT_CODEGEN void LLVMExtraMPMAdd##CLASS##_impl(LLVMModulePassManagerRef PM) \ - { \ - unwrap(PM)->addPass(CREATE_PASS); \ - } -#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \ - JL_DLLEXPORT_CODEGEN void LLVMExtraFPMAdd##CLASS##_impl(LLVMFunctionPassManagerRef PM) \ - { \ - unwrap(PM)->addPass(CREATE_PASS); \ - } -#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \ - JL_DLLEXPORT_CODEGEN void LLVMExtraLPMAdd##CLASS##_impl(LLVMLoopPassManagerRef PM) \ - { \ - unwrap(PM)->addPass(CREATE_PASS); \ - } - -#include "llvm-julia-passes.inc" - -#undef MODULE_PASS -#undef CGSCC_PASS -#undef FUNCTION_PASS -#undef LOOP_PASS - } // extern "C" diff --git a/src/module.c b/src/module.c index 8dbac950235ee..9b4d26cc7b000 100644 --- a/src/module.c +++ b/src/module.c @@ -13,10 +13,52 @@ extern "C" { #endif // In this translation unit and this translation unit only emit this symbol `extern` for use by julia -EXTERN_INLINE_DEFINE jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b, size_t world) JL_NOTSAFEPOINT; EXTERN_INLINE_DEFINE uint8_t jl_bpart_get_kind(jl_binding_partition_t *bpart) JL_NOTSAFEPOINT; extern inline enum jl_partition_kind decode_restriction_kind(jl_ptr_kind_union_t pku) JL_NOTSAFEPOINT; +static jl_binding_partition_t *new_binding_partition(void) +{ + jl_binding_partition_t *bpart = (jl_binding_partition_t*)jl_gc_alloc(jl_current_task->ptls, sizeof(jl_binding_partition_t), jl_binding_partition_type); + jl_atomic_store_relaxed(&bpart->restriction, encode_restriction(NULL, BINDING_KIND_GUARD)); + bpart->min_world = 0; + jl_atomic_store_relaxed(&bpart->max_world, (size_t)-1); + jl_atomic_store_relaxed(&bpart->next, NULL); +#ifdef _P64 + bpart->reserved = 0; +#endif + return bpart; +} + +jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b, size_t world) { + if (!b) + return NULL; + assert(jl_is_binding(b)); + jl_value_t *parent = (jl_value_t*)b; + _Atomic(jl_binding_partition_t *)*insert = &b->partitions; + jl_binding_partition_t *bpart = jl_atomic_load_relaxed(insert); + size_t max_world = (size_t)-1; + while (1) { + while (bpart && world < bpart->min_world) { + insert = &bpart->next; + max_world = bpart->min_world - 1; + parent = (jl_value_t *)bpart; + bpart = jl_atomic_load_relaxed(&bpart->next); + } + if (bpart && world <= jl_atomic_load_relaxed(&bpart->max_world)) + return bpart; + jl_binding_partition_t *new_bpart = new_binding_partition(); + jl_atomic_store_relaxed(&new_bpart->next, bpart); + jl_gc_wb_fresh(new_bpart, bpart); + if (bpart) + new_bpart->min_world = jl_atomic_load_relaxed(&bpart->max_world) + 1; + jl_atomic_store_relaxed(&new_bpart->max_world, max_world); + if (jl_atomic_cmpswap(insert, &bpart, new_bpart)) { + jl_gc_wb(parent, new_bpart); + return new_bpart; + } + } +} + JL_DLLEXPORT jl_binding_partition_t *jl_get_globalref_partition(jl_globalref_t *gr, size_t world) { if (!gr) @@ -188,19 +230,6 @@ static jl_globalref_t *jl_new_globalref(jl_module_t *mod, jl_sym_t *name, jl_bin return g; } -static jl_binding_partition_t *new_binding_partition(void) -{ - jl_binding_partition_t *bpart = (jl_binding_partition_t*)jl_gc_alloc(jl_current_task->ptls, sizeof(jl_binding_partition_t), jl_binding_partition_type); - jl_atomic_store_relaxed(&bpart->restriction, encode_restriction(NULL, BINDING_KIND_GUARD)); - bpart->min_world = 0; - jl_atomic_store_relaxed(&bpart->max_world, (size_t)-1); - jl_atomic_store_relaxed(&bpart->next, NULL); -#ifdef _P64 - bpart->reserved = 0; -#endif - return bpart; -} - static jl_binding_t *new_binding(jl_module_t *mod, jl_sym_t *name) { jl_task_t *ct = jl_current_task; @@ -215,9 +244,7 @@ static jl_binding_t *new_binding(jl_module_t *mod, jl_sym_t *name) JL_GC_PUSH1(&b); b->globalref = jl_new_globalref(mod, name, b); jl_gc_wb(b, b->globalref); - jl_binding_partition_t *bpart = new_binding_partition(); - jl_atomic_store_relaxed(&b->partitions, bpart); - jl_gc_wb(b, bpart); + jl_atomic_store_relaxed(&b->partitions, NULL); JL_GC_POP(); return b; } @@ -324,6 +351,32 @@ JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_const(jl_binding_t *b) return decode_restriction_value(pku); } +JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_resolved_and_const(jl_binding_t *b) +{ + // Unlike jl_get_binding_value_if_const this doesn't try to allocate new binding partitions if they + // don't already exist, making this JL_NOTSAFEPOINT. + if (!b) + return NULL; + jl_binding_partition_t *bpart = jl_atomic_load_relaxed(&b->partitions); + if (!bpart) + return NULL; + size_t max_world = jl_atomic_load_relaxed(&bpart->max_world); + if (bpart->min_world > jl_current_task->world_age || jl_current_task->world_age > max_world) + return NULL; + jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction); + if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) + return NULL; + if (!jl_bkind_is_some_constant(decode_restriction_kind(pku))) + return NULL; + return decode_restriction_value(pku); +} + +JL_DLLEXPORT jl_value_t *jl_bpart_get_restriction_value(jl_binding_partition_t *bpart) +{ + jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction); + return decode_restriction_value(pku); +} + typedef struct _modstack_t { jl_module_t *m; jl_sym_t *var; @@ -947,6 +1000,28 @@ JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var jl_gc_wb(bpart, val); } +extern jl_mutex_t world_counter_lock; +JL_DLLEXPORT void jl_disable_binding(jl_globalref_t *gr) +{ + jl_binding_t *b = gr->binding; + b = jl_resolve_owner(b, gr->mod, gr->name, NULL); + jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age); + + if (decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_GUARD) { + // Already guard + return; + } + + JL_LOCK(&world_counter_lock); + jl_task_t *ct = jl_current_task; + size_t new_max_world = jl_atomic_load_acquire(&jl_world_counter); + // TODO: Trigger invalidation here + (void)ct; + jl_atomic_store_release(&bpart->max_world, new_max_world); + jl_atomic_store_release(&jl_world_counter, new_max_world + 1); + JL_UNLOCK(&world_counter_lock); +} + JL_DLLEXPORT int jl_globalref_is_const(jl_globalref_t *gr) { jl_binding_t *b = gr->binding; @@ -1018,13 +1093,17 @@ void jl_binding_deprecation_warning(jl_module_t *m, jl_sym_t *s, jl_binding_t *b jl_value_t *jl_check_binding_wr(jl_binding_t *b JL_PROPAGATES_ROOT, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED, int reassign) { + JL_GC_PUSH1(&rhs); // callee-rooted jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age); jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction); assert(!jl_bkind_is_some_guard(decode_restriction_kind(pku)) && !jl_bkind_is_some_import(decode_restriction_kind(pku))); if (jl_bkind_is_some_constant(decode_restriction_kind(pku))) { jl_value_t *old = decode_restriction_value(pku); - if (jl_egal(rhs, old)) + JL_GC_PROMISE_ROOTED(old); + if (jl_egal(rhs, old)) { + JL_GC_POP(); return NULL; + } if (jl_typeof(rhs) == jl_typeof(old)) jl_errorf("invalid redefinition of constant %s.%s. This redefinition may be permitted using the `const` keyword.", jl_symbol_name(mod->name), jl_symbol_name(var)); @@ -1033,13 +1112,13 @@ jl_value_t *jl_check_binding_wr(jl_binding_t *b JL_PROPAGATES_ROOT, jl_module_t jl_symbol_name(mod->name), jl_symbol_name(var)); } jl_value_t *old_ty = decode_restriction_value(pku); + JL_GC_PROMISE_ROOTED(old_ty); if (old_ty != (jl_value_t*)jl_any_type && jl_typeof(rhs) != old_ty) { - JL_GC_PUSH1(&rhs); // callee-rooted if (!jl_isa(rhs, old_ty)) jl_errorf("cannot assign an incompatible value to the global %s.%s.", jl_symbol_name(mod->name), jl_symbol_name(var)); - JL_GC_POP(); } + JL_GC_POP(); return old_ty; } @@ -1076,6 +1155,7 @@ JL_DLLEXPORT jl_value_t *jl_checked_modify(jl_binding_t *b, jl_module_t *mod, jl jl_errorf("invalid redefinition of constant %s.%s", jl_symbol_name(mod->name), jl_symbol_name(var)); jl_value_t *ty = decode_restriction_value(pku); + JL_GC_PROMISE_ROOTED(ty); return modify_value(ty, &b->value, (jl_value_t*)b, op, rhs, 1, mod, var); } diff --git a/src/opaque_closure.c b/src/opaque_closure.c index 0bf3a729cbcb1..9fe36f32d2030 100644 --- a/src/opaque_closure.c +++ b/src/opaque_closure.c @@ -80,14 +80,16 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t if (!jl_subtype(rt_lb, selected_rt)) { // TODO: It would be better to try to get a specialization with the // correct rt check here (or we could codegen a wrapper). - specptr = NULL; invoke = (jl_fptr_args_t)jl_interpret_opaque_closure; + specptr = NULL; // this will force codegen of the unspecialized version + invoke = (jl_fptr_args_t)jl_interpret_opaque_closure; jl_value_t *ts[2] = {rt_lb, (jl_value_t*)ci->rettype}; selected_rt = jl_type_union(ts, 2); } if (!jl_subtype(ci->rettype, rt_ub)) { // TODO: It would be better to try to get a specialization with the // correct rt check here (or we could codegen a wrapper). - specptr = NULL; invoke = (jl_fptr_args_t)jl_interpret_opaque_closure; + specptr = NULL; // this will force codegen of the unspecialized version + invoke = (jl_fptr_args_t)jl_interpret_opaque_closure; selected_rt = jl_type_intersection(rt_ub, selected_rt); } @@ -108,8 +110,7 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t jl_value_t *oc_type JL_ALWAYS_LEAFTYPE = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, (jl_value_t*)argt, selected_rt); JL_GC_PROMISE_ROOTED(oc_type); - if (!specptr) { - sigtype = jl_argtype_with_function_type((jl_value_t*)oc_type, (jl_value_t*)argt); + if (specptr == NULL) { jl_method_instance_t *mi_generic = jl_specializations_get_linfo(jl_opaque_closure_method, sigtype, jl_emptysvec); // OC wrapper methods are not world dependent @@ -197,7 +198,7 @@ int jl_tupletype_length_compat(jl_value_t *v, size_t nargs) JL_CALLABLE(jl_f_opaque_closure_call) { - jl_opaque_closure_t* oc = (jl_opaque_closure_t*)F; + jl_opaque_closure_t *oc = (jl_opaque_closure_t*)F; jl_value_t *argt = jl_tparam0(jl_typeof(oc)); if (!jl_tupletype_length_compat(argt, nargs)) jl_method_error(F, args, nargs + 1, oc->world); diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 09d51598ea8b7..f8976099ee53c 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -617,29 +617,29 @@ namespace { void adjustPIC(PassInstrumentationCallbacks &PIC) JL_NOTSAFEPOINT { //Borrowed from LLVM PassBuilder.cpp:386 -#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \ +#define MODULE_PASS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +#define MODULE_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS) \ PIC.addClassToPassName(CLASS, NAME); #define MODULE_ANALYSIS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \ +#define FUNCTION_PASS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS) \ PIC.addClassToPassName(CLASS, NAME); #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOPNEST_PASS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \ +#define LOOP_PASS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS) \ PIC.addClassToPassName(CLASS, NAME); #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define CGSCC_PASS(NAME, CLASS, CREATE_PASS) \ +#define CGSCC_PASS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +#define CGSCC_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS) \ PIC.addClassToPassName(CLASS, NAME); #define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); @@ -899,7 +899,7 @@ static void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { PB.registerPipelineParsingCallback( [](StringRef Name, FunctionPassManager &PM, ArrayRef InnerPipeline) { -#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } +#define FUNCTION_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } #include "llvm-julia-passes.inc" #undef FUNCTION_PASS if (Name.consume_front("GCInvariantVerifier")) { @@ -921,7 +921,7 @@ static void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { PB.registerPipelineParsingCallback( [](StringRef Name, ModulePassManager &PM, ArrayRef InnerPipeline) { -#define MODULE_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } +#define MODULE_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } #include "llvm-julia-passes.inc" #undef MODULE_PASS if (Name.consume_front("LowerPTLSPass")) { @@ -964,7 +964,7 @@ static void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { PB.registerPipelineParsingCallback( [](StringRef Name, LoopPassManager &PM, ArrayRef InnerPipeline) { -#define LOOP_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } +#define LOOP_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } #include "llvm-julia-passes.inc" #undef LOOP_PASS return false; @@ -980,3 +980,9 @@ extern "C" JL_DLLEXPORT_CODEGEN ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() JL_NOTSAFEPOINT { return {LLVM_PLUGIN_API_VERSION, "Julia", "1", registerCallbacks}; } + +void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis) +{ + PM->add(new TargetLibraryInfoWrapperPass(triple)); + PM->add(createTargetTransformInfoWrapperPass(std::move(analysis))); +} diff --git a/src/precompile_utils.c b/src/precompile_utils.c index a78d1e66dbb51..fc361d8b88e6f 100644 --- a/src/precompile_utils.c +++ b/src/precompile_utils.c @@ -312,10 +312,12 @@ static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_met } } } - n = jl_array_nrows(new_ext_cis); - for (i = 0; i < n; i++) { - jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_ext_cis, i); - precompile_enq_specialization_(ci->def, m); + if (new_ext_cis) { + n = jl_array_nrows(new_ext_cis); + for (i = 0; i < n; i++) { + jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_ext_cis, i); + precompile_enq_specialization_(ci->def, m); + } } void *native_code = jl_precompile_(m, 1); JL_GC_POP(); diff --git a/src/rtutils.c b/src/rtutils.c index 85a9be5e0b1da..faa087dcb077d 100644 --- a/src/rtutils.c +++ b/src/rtutils.c @@ -661,7 +661,7 @@ static int is_globname_binding(jl_value_t *v, jl_datatype_t *dv) JL_NOTSAFEPOINT jl_sym_t *globname = dv->name->mt != NULL ? dv->name->mt->name : NULL; if (globname && dv->name->module) { jl_binding_t *b = jl_get_module_binding(dv->name->module, globname, 0); - jl_value_t *bv = jl_get_binding_value_if_const(b); + jl_value_t *bv = jl_get_binding_value_if_resolved_and_const(b); // The `||` makes this function work for both function instances and function types. if (bv && (bv == v || jl_typeof(bv) == v)) return 1; diff --git a/src/scheduler.c b/src/scheduler.c index bb2f85b52283f..7e23f654c2566 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,10 +80,6 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } -// GC functions used -extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, - jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; - // initialize the threading infrastructure // (called only by the main thread) void jl_init_threadinginfra(void) diff --git a/src/stackwalk.c b/src/stackwalk.c index 6784e601bcfba..770daa8bf17a6 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -5,7 +5,7 @@ utilities for walking the stack and looking up information about code addresses */ #include -#include "gc-stock.h" +#include "gc-common.h" #include "julia.h" #include "julia_internal.h" #include "threading.h" @@ -642,13 +642,13 @@ void jl_print_native_codeloc(uintptr_t ip) JL_NOTSAFEPOINT for (i = 0; i < n; i++) { jl_frame_t frame = frames[i]; if (!frame.func_name) { - jl_safe_printf("unknown function (ip: %p)\n", (void*)ip); + jl_safe_printf("unknown function (ip: %p) at %s\n", (void*)ip, frame.file_name ? frame.file_name : "(unknown file)"); } else { jl_safe_print_codeloc(frame.func_name, frame.file_name, frame.line, frame.inlined); free(frame.func_name); - free(frame.file_name); } + free(frame.file_name); } free(frames); } @@ -1340,18 +1340,14 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states); for (size_t i = 0; i < nthreads; i++) { jl_ptls_t ptls2 = allstates[i]; - if (gc_is_parallel_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for parallel GC thread %zu\n", i + 1); - continue; - } - if (gc_is_concurrent_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for concurrent GC thread %zu\n", i + 1); + if (gc_is_collector_thread(i)) { + jl_safe_printf("==== Skipping backtrace for parallel/concurrent GC thread %zu\n", i + 1); continue; } if (ptls2 == NULL) { continue; } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); int t_state = JL_TASK_STATE_DONE; jl_task_t *t = ptls2->root_task; diff --git a/src/staticdata.c b/src/staticdata.c index af4527cbc143f..af3477a25128e 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -3883,9 +3883,12 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl if ((jl_value_t*)b == jl_nothing) continue; jl_binding_partition_t *bpart = jl_atomic_load_relaxed(&b->partitions); - jl_atomic_store_relaxed(&bpart->restriction, - encode_restriction((jl_value_t*)jl_atomic_load_relaxed(&bpart->restriction), bpart->reserved)); - bpart->reserved = 0; + while (bpart) { + jl_atomic_store_relaxed(&bpart->restriction, + encode_restriction((jl_value_t*)jl_atomic_load_relaxed(&bpart->restriction), bpart->reserved)); + bpart->reserved = 0; + bpart = jl_atomic_load_relaxed(&bpart->next); + } } #endif } diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index 5f1095fec9168..9a7653972ea7c 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -605,15 +605,15 @@ static void write_mod_list(ios_t *s, jl_array_t *a) write_int32(s, 0); } -// OPT_LEVEL should always be the upper bits #define OPT_LEVEL 6 +#define DEBUG_LEVEL 1 JL_DLLEXPORT uint8_t jl_cache_flags(void) { // OOICCDDP uint8_t flags = 0; flags |= (jl_options.use_pkgimages & 1); // 0-bit - flags |= (jl_options.debug_level & 3) << 1; // 1-2 bit + flags |= (jl_options.debug_level & 3) << DEBUG_LEVEL; // 1-2 bit flags |= (jl_options.check_bounds & 3) << 3; // 3-4 bit flags |= (jl_options.can_inline & 1) << 5; // 5-bit flags |= (jl_options.opt_level & 3) << OPT_LEVEL; // 6-7 bit @@ -636,14 +636,13 @@ JL_DLLEXPORT uint8_t jl_match_cache_flags(uint8_t requested_flags, uint8_t actua actual_flags &= ~1; } - // 2. Check all flags, except opt level must be exact - uint8_t mask = (1 << OPT_LEVEL)-1; + // 2. Check all flags, except opt level and debug level must be exact + uint8_t mask = (~(3u << OPT_LEVEL) & ~(3u << DEBUG_LEVEL)) & 0x7f; if ((actual_flags & mask) != (requested_flags & mask)) return 0; - // 3. allow for higher optimization flags in cache - actual_flags >>= OPT_LEVEL; - requested_flags >>= OPT_LEVEL; - return actual_flags >= requested_flags; + // 3. allow for higher optimization and debug level flags in cache to minimize required compile option combinations + return ((actual_flags >> OPT_LEVEL) & 3) >= ((requested_flags >> OPT_LEVEL) & 3) && + ((actual_flags >> DEBUG_LEVEL) & 3) >= ((requested_flags >> DEBUG_LEVEL) & 3); } JL_DLLEXPORT uint8_t jl_match_cache_flags_current(uint8_t flags) diff --git a/src/toplevel.c b/src/toplevel.c index 8caa8b086ec00..c2fbc38d067eb 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -206,11 +206,17 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex if (std_imports) { if (jl_base_module != NULL) { jl_add_standard_imports(newm); + jl_datatype_t *include_into = (jl_datatype_t *)jl_get_global(jl_base_module, jl_symbol("IncludeInto")); + if (include_into) { + form = jl_new_struct(include_into, newm); + jl_set_const(newm, jl_symbol("include"), form); + } + } + jl_datatype_t *eval_into = (jl_datatype_t *)jl_get_global(jl_core_module, jl_symbol("EvalInto")); + if (eval_into) { + form = jl_new_struct(eval_into, newm); + jl_set_const(newm, jl_symbol("eval"), form); } - // add `eval` function - form = jl_call_scm_on_ast_and_loc("module-default-defs", (jl_value_t*)name, newm, filename, lineno); - jl_toplevel_eval_flex(newm, form, 0, 1, &filename, &lineno); - form = NULL; } newm->file = jl_symbol(filename); @@ -318,6 +324,7 @@ void jl_binding_set_type(jl_binding_t *b, jl_module_t *mod, jl_sym_t *sym, jl_va jl_symbol_name(mod->name), jl_symbol_name(sym)); } jl_value_t *old_ty = decode_restriction_value(pku); + JL_GC_PROMISE_ROOTED(old_ty); if (!jl_types_equal(ty, old_ty)) { jl_errorf("cannot set type for global %s.%s. It already has a value or is already set to a different type.", jl_symbol_name(mod->name), jl_symbol_name(sym)); @@ -738,6 +745,7 @@ JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val2(jl_binding_t *b, j if (!val) return bpart; jl_value_t *old = decode_restriction_value(pku); + JL_GC_PROMISE_ROOTED(old); if (jl_egal(val, old)) break; if (!did_warn) { diff --git a/stdlib/InteractiveUtils/src/macros.jl b/stdlib/InteractiveUtils/src/macros.jl index 211687df47954..e338d8626fb0f 100644 --- a/stdlib/InteractiveUtils/src/macros.jl +++ b/stdlib/InteractiveUtils/src/macros.jl @@ -4,6 +4,10 @@ import Base: typesof, insert!, replace_ref_begin_end!, infer_effects +# defined in Base so it's possible to time all imports, including InteractiveUtils and its deps +# via. `Base.@time_imports` etc. +import Base: @time_imports, @trace_compile, @trace_dispatch + separate_kwargs(args...; kwargs...) = (args, values(kwargs)) """ @@ -245,39 +249,6 @@ macro code_lowered(ex0...) end end -macro time_imports(ex) - quote - try - Base.Threads.atomic_add!(Base.TIMING_IMPORTS, 1) - $(esc(ex)) - finally - Base.Threads.atomic_sub!(Base.TIMING_IMPORTS, 1) - end - end -end - -macro trace_compile(ex) - quote - try - ccall(:jl_force_trace_compile_timing_enable, Cvoid, ()) - $(esc(ex)) - finally - ccall(:jl_force_trace_compile_timing_disable, Cvoid, ()) - end - end -end - -macro trace_dispatch(ex) - quote - try - ccall(:jl_force_trace_dispatch_enable, Cvoid, ()) - $(esc(ex)) - finally - ccall(:jl_force_trace_dispatch_disable, Cvoid, ()) - end - end -end - """ @functionloc diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl index a34df37153cd2..b38a983296065 100644 --- a/stdlib/LinearAlgebra/src/bidiag.jl +++ b/stdlib/LinearAlgebra/src/bidiag.jl @@ -118,17 +118,6 @@ Bidiagonal(A::Bidiagonal) = A Bidiagonal{T}(A::Bidiagonal{T}) where {T} = A Bidiagonal{T}(A::Bidiagonal) where {T} = Bidiagonal{T}(A.dv, A.ev, A.uplo) -function diagzero(A::Bidiagonal{<:AbstractMatrix}, i, j) - Tel = eltype(A) - if i < j && A.uplo == 'U' #= top right zeros =# - return zeroslike(Tel, axes(A.ev[i], 1), axes(A.ev[j-1], 2)) - elseif j < i && A.uplo == 'L' #= bottom left zeros =# - return zeroslike(Tel, axes(A.ev[i-1], 1), axes(A.ev[j], 2)) - else - return zeroslike(Tel, axes(A.dv[i], 1), axes(A.dv[j], 2)) - end -end - _offdiagind(uplo) = uplo == 'U' ? 1 : -1 @inline function Base.isassigned(A::Bidiagonal, i::Int, j::Int) diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl index aacc5479bfa9d..d8f2513f5bfc8 100644 --- a/stdlib/LinearAlgebra/src/dense.jl +++ b/stdlib/LinearAlgebra/src/dense.jl @@ -110,6 +110,7 @@ norm2(x::Union{Array{T},StridedVector{T}}) where {T<:BlasFloat} = # Conservative assessment of types that have zero(T) defined for themselves haszero(::Type) = false haszero(::Type{T}) where {T<:Number} = isconcretetype(T) +haszero(::Type{Union{Missing,T}}) where {T<:Number} = haszero(T) @propagate_inbounds _zero(M::AbstractArray{T}, inds...) where {T} = haszero(T) ? zero(T) : zero(M[inds...]) """ diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl index 17ff232f5b262..417bcfa5715b1 100644 --- a/stdlib/LinearAlgebra/src/diagonal.jl +++ b/stdlib/LinearAlgebra/src/diagonal.jl @@ -191,8 +191,9 @@ end Return the appropriate zero element `A[i, j]` corresponding to a banded matrix `A`. """ diagzero(A::AbstractMatrix, i, j) = zero(eltype(A)) -diagzero(D::Diagonal{M}, i, j) where {M<:AbstractMatrix} = - zeroslike(M, axes(D.diag[i], 1), axes(D.diag[j], 2)) +diagzero(A::AbstractMatrix{M}, i, j) where {M<:AbstractMatrix} = + zeroslike(M, axes(A[i,i], 1), axes(A[j,j], 2)) +diagzero(A::AbstractMatrix, inds...) = diagzero(A, to_indices(A, inds)...) # dispatching on the axes permits specializing on the axis types to return something other than an Array zeroslike(M::Type, ax::Vararg{Union{AbstractUnitRange, Integer}}) = zeroslike(M, ax) """ @@ -700,16 +701,16 @@ end zerofilled = true end end - @inbounds for i = 1:nA, j = 1:nB + for i in eachindex(valA), j in eachindex(valB) idx = (i-1)*nB+j - C[idx, idx] = valA[i] * valB[j] + @inbounds C[idx, idx] = valA[i] * valB[j] end if !zerofilled - for j in 1:nA, i in 1:mA + for j in axes(A,2), i in axes(A,1) Δrow, Δcol = (i-1)*mB, (j-1)*nB - for k in 1:nB, l in 1:mB + for k in axes(B,2), l in axes(B,1) i == j && k == l && continue - C[Δrow + l, Δcol + k] = A[i,j] * B[l,k] + @inbounds C[Δrow + l, Δcol + k] = A[i,j] * B[l,k] end end end @@ -749,24 +750,24 @@ end end end m = 1 - @inbounds for j = 1:nA - A_jj = A[j,j] - for k = 1:nB - for l = 1:mB - C[m] = A_jj * B[l,k] + for j in axes(A,2) + A_jj = @inbounds A[j,j] + for k in axes(B,2) + for l in axes(B,1) + @inbounds C[m] = A_jj * B[l,k] m += 1 end m += (nA - 1) * mB end if !zerofilled # populate the zero elements - for i in 1:mA + for i in axes(A,1) i == j && continue - A_ij = A[i, j] + A_ij = @inbounds A[i, j] Δrow, Δcol = (i-1)*mB, (j-1)*nB - for k in 1:nB, l in 1:nA - B_lk = B[l, k] - C[Δrow + l, Δcol + k] = A_ij * B_lk + for k in axes(B,2), l in axes(B,1) + B_lk = @inbounds B[l, k] + @inbounds C[Δrow + l, Δcol + k] = A_ij * B_lk end end end @@ -792,23 +793,23 @@ end end end m = 1 - @inbounds for j = 1:nA - for l = 1:mB - Bll = B[l,l] - for i = 1:mA - C[m] = A[i,j] * Bll + for j in axes(A,2) + for l in axes(B,1) + Bll = @inbounds B[l,l] + for i in axes(A,1) + @inbounds C[m] = A[i,j] * Bll m += nB end m += 1 end if !zerofilled - for i in 1:mA - A_ij = A[i, j] + for i in axes(A,1) + A_ij = @inbounds A[i, j] Δrow, Δcol = (i-1)*mB, (j-1)*nB - for k in 1:nB, l in 1:mB + for k in axes(B,2), l in axes(B,1) l == k && continue - B_lk = B[l, k] - C[Δrow + l, Δcol + k] = A_ij * B_lk + B_lk = @inbounds B[l, k] + @inbounds C[Δrow + l, Δcol + k] = A_ij * B_lk end end end diff --git a/stdlib/LinearAlgebra/src/generic.jl b/stdlib/LinearAlgebra/src/generic.jl index e5f23b4981616..6c65c49add74b 100644 --- a/stdlib/LinearAlgebra/src/generic.jl +++ b/stdlib/LinearAlgebra/src/generic.jl @@ -389,55 +389,7 @@ function cross(a::AbstractVector, b::AbstractVector) end """ - triu(M) - -Upper triangle of a matrix. - -# Examples -```jldoctest -julia> a = fill(1.0, (4,4)) -4×4 Matrix{Float64}: - 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 - -julia> triu(a) -4×4 Matrix{Float64}: - 1.0 1.0 1.0 1.0 - 0.0 1.0 1.0 1.0 - 0.0 0.0 1.0 1.0 - 0.0 0.0 0.0 1.0 -``` -""" -triu(M::AbstractMatrix) = triu!(copymutable(M)) - -""" - tril(M) - -Lower triangle of a matrix. - -# Examples -```jldoctest -julia> a = fill(1.0, (4,4)) -4×4 Matrix{Float64}: - 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 - -julia> tril(a) -4×4 Matrix{Float64}: - 1.0 0.0 0.0 0.0 - 1.0 1.0 0.0 0.0 - 1.0 1.0 1.0 0.0 - 1.0 1.0 1.0 1.0 -``` -""" -tril(M::AbstractMatrix) = tril!(copymutable(M)) - -""" - triu(M, k::Integer) + triu(M, k::Integer = 0) Return the upper triangle of `M` starting from the `k`th superdiagonal. @@ -465,10 +417,22 @@ julia> triu(a,-3) 1.0 1.0 1.0 1.0 ``` """ -triu(M::AbstractMatrix,k::Integer) = triu!(copymutable(M),k) +function triu(M::AbstractMatrix, k::Integer = 0) + d = similar(M) + A = triu!(d,k) + if iszero(k) + copytrito!(A, M, 'U') + else + for col in axes(A,2) + rows = firstindex(A,1):min(col-k, lastindex(A,1)) + A[rows, col] = @view M[rows, col] + end + end + return A +end """ - tril(M, k::Integer) + tril(M, k::Integer = 0) Return the lower triangle of `M` starting from the `k`th superdiagonal. @@ -496,7 +460,19 @@ julia> tril(a,-3) 1.0 0.0 0.0 0.0 ``` """ -tril(M::AbstractMatrix,k::Integer) = tril!(copymutable(M),k) +function tril(M::AbstractMatrix,k::Integer=0) + d = similar(M) + A = tril!(d,k) + if iszero(k) + copytrito!(A, M, 'L') + else + for col in axes(A,2) + rows = max(firstindex(A,1),col-k):lastindex(A,1) + A[rows, col] = @view M[rows, col] + end + end + return A +end """ triu!(M) diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl index e17eb80d25453..265995d9e7806 100644 --- a/stdlib/LinearAlgebra/src/symmetric.jl +++ b/stdlib/LinearAlgebra/src/symmetric.jl @@ -307,7 +307,9 @@ function applytri(f, A::HermOrSym, B::HermOrSym) f(uppertriangular(_conjugation(A)(A.data)), uppertriangular(B.data)) end end -parentof_applytri(f, args...) = applytri(parent ∘ f, args...) +_parent_tri(U::UpperOrLowerTriangular) = parent(U) +_parent_tri(U) = U +parentof_applytri(f, args...) = _parent_tri(applytri(f, args...)) isdiag(A::HermOrSym) = applytri(isdiag, A) diff --git a/stdlib/LinearAlgebra/src/triangular.jl b/stdlib/LinearAlgebra/src/triangular.jl index 71660bc5ca28c..d6994f4b4dd58 100644 --- a/stdlib/LinearAlgebra/src/triangular.jl +++ b/stdlib/LinearAlgebra/src/triangular.jl @@ -142,6 +142,7 @@ UnitUpperTriangular const UpperOrUnitUpperTriangular{T,S} = Union{UpperTriangular{T,S}, UnitUpperTriangular{T,S}} const LowerOrUnitLowerTriangular{T,S} = Union{LowerTriangular{T,S}, UnitLowerTriangular{T,S}} const UpperOrLowerTriangular{T,S} = Union{UpperOrUnitUpperTriangular{T,S}, LowerOrUnitLowerTriangular{T,S}} +const UnitUpperOrUnitLowerTriangular{T,S} = Union{UnitUpperTriangular{T,S}, UnitLowerTriangular{T,S}} uppertriangular(M) = UpperTriangular(M) lowertriangular(M) = LowerTriangular(M) @@ -181,6 +182,16 @@ copy(A::UpperOrLowerTriangular{<:Any, <:StridedMaybeAdjOrTransMat}) = copyto!(si # then handle all methods that requires specific handling of upper/lower and unit diagonal +function full(A::Union{UpperTriangular,LowerTriangular}) + return _triangularize(A)(parent(A)) +end +function full(A::UnitUpperOrUnitLowerTriangular) + isupper = A isa UnitUpperTriangular + Ap = _triangularize(A)(parent(A), isupper ? 1 : -1) + Ap[diagind(Ap, IndexStyle(Ap))] = @view A[diagind(A, IndexStyle(A))] + return Ap +end + function full!(A::LowerTriangular) B = A.data tril!(B) @@ -222,7 +233,7 @@ Base.isstored(A::UpperOrLowerTriangular, i::Int, j::Int) = @propagate_inbounds getindex(A::Union{UnitLowerTriangular{T}, UnitUpperTriangular{T}}, i::Int, j::Int) where {T} = _shouldforwardindex(A, i, j) ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T)) @propagate_inbounds getindex(A::Union{LowerTriangular, UpperTriangular}, i::Int, j::Int) = - _shouldforwardindex(A, i, j) ? A.data[i,j] : _zero(A.data,j,i) + _shouldforwardindex(A, i, j) ? A.data[i,j] : diagzero(A,i,j) _shouldforwardindex(U::UpperTriangular, b::BandIndex) = b.band >= 0 _shouldforwardindex(U::LowerTriangular, b::BandIndex) = b.band <= 0 @@ -234,7 +245,7 @@ Base.@constprop :aggressive @propagate_inbounds function getindex(A::Union{UnitL _shouldforwardindex(A, b) ? A.data[b] : ifelse(b.band == 0, oneunit(T), zero(T)) end Base.@constprop :aggressive @propagate_inbounds function getindex(A::Union{LowerTriangular, UpperTriangular}, b::BandIndex) - _shouldforwardindex(A, b) ? A.data[b] : _zero(A.data, b) + _shouldforwardindex(A, b) ? A.data[b] : diagzero(A.data, b) end _zero_triangular_half_str(::Type{<:UpperOrUnitUpperTriangular}) = "lower" @@ -571,6 +582,8 @@ end return A end +_triangularize(::UpperOrUnitUpperTriangular) = triu +_triangularize(::LowerOrUnitLowerTriangular) = tril _triangularize!(::UpperOrUnitUpperTriangular) = triu! _triangularize!(::LowerOrUnitLowerTriangular) = tril! @@ -880,7 +893,8 @@ function +(A::UnitLowerTriangular, B::UnitLowerTriangular) (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .+ B LowerTriangular(tril(A.data, -1) + tril(B.data, -1) + 2I) end -+(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A)), A) + copyto!(similar(parent(B)), B) ++(A::UpperOrLowerTriangular, B::UpperOrLowerTriangular) = full(A) + full(B) ++(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A), size(A)), A) + copyto!(similar(parent(B), size(B)), B) function -(A::UpperTriangular, B::UpperTriangular) (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .- B @@ -914,7 +928,8 @@ function -(A::UnitLowerTriangular, B::UnitLowerTriangular) (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .- B LowerTriangular(tril(A.data, -1) - tril(B.data, -1)) end --(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A)), A) - copyto!(similar(parent(B)), B) +-(A::UpperOrLowerTriangular, B::UpperOrLowerTriangular) = full(A) - full(B) +-(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A), size(A)), A) - copyto!(similar(parent(B), size(B)), B) function kron(A::UpperTriangular{T,<:StridedMaybeAdjOrTransMat}, B::UpperTriangular{S,<:StridedMaybeAdjOrTransMat}) where {T,S} C = UpperTriangular(Matrix{promote_op(*, T, S)}(undef, _kronsize(A, B))) diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl index 8b56ee15e56e3..1c3a9dfa676ac 100644 --- a/stdlib/LinearAlgebra/test/diagonal.jl +++ b/stdlib/LinearAlgebra/test/diagonal.jl @@ -353,7 +353,7 @@ Random.seed!(1) D3 = Diagonal(convert(Vector{elty}, rand(n÷2))) DM3= Matrix(D3) @test Matrix(kron(D, D3)) ≈ kron(DM, DM3) - M4 = rand(elty, n÷2, n÷2) + M4 = rand(elty, size(D3,1) + 1, size(D3,2) + 2) # choose a different size from D3 @test kron(D3, M4) ≈ kron(DM3, M4) @test kron(M4, D3) ≈ kron(M4, DM3) X = [ones(1,1) for i in 1:2, j in 1:2] @@ -1392,7 +1392,7 @@ end end @testset "zeros in kron with block matrices" begin - D = Diagonal(1:2) + D = Diagonal(1:4) B = reshape([ones(2,2), ones(3,2), ones(2,3), ones(3,3)], 2, 2) @test kron(D, B) == kron(Array(D), B) @test kron(B, D) == kron(B, Array(D)) diff --git a/stdlib/LinearAlgebra/test/generic.jl b/stdlib/LinearAlgebra/test/generic.jl index e0a1704913f78..2bf9c75141700 100644 --- a/stdlib/LinearAlgebra/test/generic.jl +++ b/stdlib/LinearAlgebra/test/generic.jl @@ -18,6 +18,9 @@ using .Main.DualNumbers isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl")) using .Main.FillArrays +isdefined(Main, :SizedArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "SizedArrays.jl")) +using .Main.SizedArrays + Random.seed!(123) n = 5 # should be odd @@ -725,4 +728,56 @@ end @test det(A) == det(M) end +@testset "tril/triu" begin + @testset "with partly initialized matrices" begin + function test_triu(M, k=nothing) + M[1,1] = M[2,2] = M[1,2] = M[1,3] = M[2,3] = 3 + if isnothing(k) + MU = triu(M) + else + MU = triu(M, k) + end + @test iszero(MU[2,1]) + @test MU[1,1] == MU[2,2] == MU[1,2] == MU[1,3] == MU[2,3] == 3 + end + test_triu(Matrix{BigInt}(undef, 2, 3)) + test_triu(Matrix{BigInt}(undef, 2, 3), 0) + test_triu(SizedArrays.SizedArray{(2,3)}(Matrix{BigInt}(undef, 2, 3))) + test_triu(SizedArrays.SizedArray{(2,3)}(Matrix{BigInt}(undef, 2, 3)), 0) + + function test_tril(M, k=nothing) + M[1,1] = M[2,2] = M[2,1] = 3 + if isnothing(k) + ML = tril(M) + else + ML = tril(M, k) + end + @test ML[1,2] == ML[1,3] == ML[2,3] == 0 + @test ML[1,1] == ML[2,2] == ML[2,1] == 3 + end + test_tril(Matrix{BigInt}(undef, 2, 3)) + test_tril(Matrix{BigInt}(undef, 2, 3), 0) + test_tril(SizedArrays.SizedArray{(2,3)}(Matrix{BigInt}(undef, 2, 3))) + test_tril(SizedArrays.SizedArray{(2,3)}(Matrix{BigInt}(undef, 2, 3)), 0) + end + + @testset "block arrays" begin + for nrows in 0:3, ncols in 0:3 + M = [randn(2,2) for _ in 1:nrows, _ in 1:ncols] + Mu = triu(M) + for col in axes(M,2) + rowcutoff = min(col, size(M,1)) + @test @views Mu[1:rowcutoff, col] == M[1:rowcutoff, col] + @test @views Mu[rowcutoff+1:end, col] == zero.(M[rowcutoff+1:end, col]) + end + Ml = tril(M) + for col in axes(M,2) + @test @views Ml[col:end, col] == M[col:end, col] + rowcutoff = min(col-1, size(M,1)) + @test @views Ml[1:rowcutoff, col] == zero.(M[1:rowcutoff, col]) + end + end + end +end + end # module TestGeneric diff --git a/stdlib/LinearAlgebra/test/symmetric.jl b/stdlib/LinearAlgebra/test/symmetric.jl index 7a51ab9d454af..3aef23617b942 100644 --- a/stdlib/LinearAlgebra/test/symmetric.jl +++ b/stdlib/LinearAlgebra/test/symmetric.jl @@ -1160,4 +1160,11 @@ end @test symT-s == Array(symT) - Array(s) end +@testset "issue #56283" begin + a = 1.0 + D = Diagonal(randn(10)) + H = Hermitian(D*D') + @test a*H == H +end + end # module TestSymmetric diff --git a/stdlib/LinearAlgebra/test/triangular.jl b/stdlib/LinearAlgebra/test/triangular.jl index ec9a3079e2643..678827ceac720 100644 --- a/stdlib/LinearAlgebra/test/triangular.jl +++ b/stdlib/LinearAlgebra/test/triangular.jl @@ -1284,6 +1284,14 @@ end @test istril(U, k) == istril(A, k) end end + + @testset "Union eltype" begin + M = Matrix{Union{Int,Missing}}(missing,2,2) + U = triu(M) + @test iszero(U[2,1]) + U = tril(M) + @test iszero(U[1,2]) + end end @testset "indexing with a BandIndex" begin @@ -1322,4 +1330,60 @@ end end end +@testset "indexing uses diagzero" begin + @testset "block matrix" begin + M = reshape([zeros(2,2), zeros(4,2), zeros(2,3), zeros(4,3)],2,2) + U = UpperTriangular(M) + @test [size(x) for x in U] == [size(x) for x in M] + end + @testset "Union eltype" begin + M = Matrix{Union{Int,Missing}}(missing,4,4) + U = UpperTriangular(M) + @test iszero(U[3,1]) + end +end + +@testset "addition/subtraction of mixed triangular" begin + for A in (Hermitian(rand(4, 4)), Diagonal(rand(5))) + for T in (UpperTriangular, LowerTriangular, + UnitUpperTriangular, UnitLowerTriangular) + B = T(A) + M = Matrix(B) + R = B - B' + if A isa Diagonal + @test R isa Diagonal + end + @test R == M - M' + R = B + B' + if A isa Diagonal + @test R isa Diagonal + end + @test R == M + M' + C = MyTriangular(B) + @test C - C' == M - M' + @test C + C' == M + M' + end + end + @testset "unfilled parent" begin + @testset for T in (UpperTriangular, LowerTriangular, + UnitUpperTriangular, UnitLowerTriangular) + F = Matrix{BigFloat}(undef, 2, 2) + B = T(F) + isupper = B isa Union{UpperTriangular, UnitUpperTriangular} + B[1+!isupper, 1+isupper] = 2 + if !(B isa Union{UnitUpperTriangular, UnitLowerTriangular}) + B[1,1] = B[2,2] = 3 + end + M = Matrix(B) + @test B - B' == M - M' + @test B + B' == M + M' + @test B - copy(B') == M - M' + @test B + copy(B') == M + M' + C = MyTriangular(B) + @test C - C' == M - M' + @test C + C' == M + M' + end + end +end + end # module TestTriangular diff --git a/stdlib/Logging/docs/src/index.md b/stdlib/Logging/docs/src/index.md index 17d4e71328ac4..a2bfd499e4586 100644 --- a/stdlib/Logging/docs/src/index.md +++ b/stdlib/Logging/docs/src/index.md @@ -191,10 +191,10 @@ module. Loading julia with `JULIA_DEBUG=loading` will activate ``` $ JULIA_DEBUG=loading julia -e 'using OhMyREPL' -┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji due to it containing an invalid cache header +┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji due to it containing an incompatible cache header └ @ Base loading.jl:1328 [ Info: Recompiling stale cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji for module OhMyREPL -┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/Tokenize.ji due to it containing an invalid cache header +┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/Tokenize.ji due to it containing an incompatible cache header └ @ Base loading.jl:1328 ... ``` diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version index 470acefbc6c83..c29c83fce4046 100644 --- a/stdlib/Pkg.version +++ b/stdlib/Pkg.version @@ -1,4 +1,4 @@ PKG_BRANCH = master -PKG_SHA1 = 27c1b1ee5cf15571eb5e54707e812d646ac1dde3 +PKG_SHA1 = 799dc2d54c4e809b9779de8c604564a5b3befaa0 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1 diff --git a/stdlib/REPL/src/Pkg_beforeload.jl b/stdlib/REPL/src/Pkg_beforeload.jl index 472fbc924668d..e110910bafc2f 100644 --- a/stdlib/REPL/src/Pkg_beforeload.jl +++ b/stdlib/REPL/src/Pkg_beforeload.jl @@ -1,17 +1,16 @@ ## Pkg stuff needed before Pkg has loaded const Pkg_pkgid = Base.PkgId(Base.UUID("44cfe95a-1eb2-52ea-b672-e2afdf69b78f"), "Pkg") -const Pkg_REPLExt_pkgid = Base.PkgId(Base.UUID("ceef7b17-42e7-5b1c-81d4-4cc4a2494ccf"), "REPLExt") function load_pkg() + REPLExt = Base.require_stdlib(Pkg_pkgid, "REPLExt") @lock Base.require_lock begin - REPLExt = Base.require_stdlib(Pkg_pkgid, "REPLExt") # require_stdlib does not guarantee that the `__init__` of the package is done when loading is done async # but we need to wait for the repl mode to be set up - lock = get(Base.package_locks, Pkg_REPLExt_pkgid.uuid, nothing) + lock = get(Base.package_locks, Base.PkgId(REPLExt), nothing) lock !== nothing && wait(lock[2]) - return REPLExt end + return REPLExt end ## Below here copied/tweaked from Pkg Types.jl so that the dummy Pkg prompt diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index 88458f7de4666..ac791327e2d75 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -525,7 +525,7 @@ function Base.unsafe_write(limiter::LimitIO, p::Ptr{UInt8}, nb::UInt) end # We won't hit the limit so we'll write the full `nb` bytes - bytes_written = Base.unsafe_write(limiter.io, p, nb) + bytes_written = Base.unsafe_write(limiter.io, p, nb)::Union{Int,UInt} limiter.n += bytes_written return bytes_written end @@ -1431,6 +1431,7 @@ function setup_interface( end else edit_insert(s, ';') + LineEdit.check_for_hint(s) && LineEdit.refresh_line(s) end end, '?' => function (s::MIState,o...) @@ -1441,6 +1442,7 @@ function setup_interface( end else edit_insert(s, '?') + LineEdit.check_for_hint(s) && LineEdit.refresh_line(s) end end, ']' => function (s::MIState,o...) @@ -1477,6 +1479,7 @@ function setup_interface( Base.errormonitor(t_replswitch) else edit_insert(s, ']') + LineEdit.check_for_hint(s) && LineEdit.refresh_line(s) end end, diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl index 5e80e17036559..d230b7b5fd232 100644 --- a/stdlib/REPL/src/REPLCompletions.jl +++ b/stdlib/REPL/src/REPLCompletions.jl @@ -480,6 +480,7 @@ function find_start_brace(s::AbstractString; c_start='(', c_end=')') i = firstindex(r) braces = in_comment = 0 in_single_quotes = in_double_quotes = in_back_ticks = false + num_single_quotes_in_string = count('\'', s) while i <= ncodeunits(r) c, i = iterate(r, i) if c == '#' && i <= ncodeunits(r) && iterate(r, i)[1] == '=' @@ -502,7 +503,9 @@ function find_start_brace(s::AbstractString; c_start='(', c_end=')') braces += 1 elseif c == c_end braces -= 1 - elseif c == '\'' + elseif c == '\'' && num_single_quotes_in_string % 2 == 0 + # ' can be a transpose too, so check if there are even number of 's in the string + # TODO: This probably needs to be more robust in_single_quotes = true elseif c == '"' in_double_quotes = true @@ -932,17 +935,11 @@ function get_import_mode(s::String) return nothing end -function close_path_completion(dir, paths, str, pos) - length(paths) == 1 || return false # Only close if there's a single choice... - path = (paths[1]::PathCompletion).path +function close_path_completion(dir, path, str, pos) path = unescape_string(replace(path, "\\\$"=>"\$")) path = joinpath(dir, path) # ...except if it's a directory... - try - isdir(path) - catch e - e isa Base.IOError || rethrow() # `path` cannot be determined to be a file - end && return false + Base.isaccessibledir(path) && return false # ...and except if there's already a " at the cursor. return lastindex(str) <= pos || str[nextind(str, pos)] != '"' end @@ -1197,7 +1194,9 @@ function complete_identifiers!(suggestions::Vector{Completion}, if !isinfix # Handle infix call argument completion of the form bar + foo(qux). frange, end_of_identifier = find_start_brace(@view s[1:prevind(s, end)]) - isinfix = Meta.parse(@view(s[frange[1]:end]), raise=false, depwarn=false) == prefix.args[end] + if !isempty(frange) # if find_start_brace fails to find the brace just continue + isinfix = Meta.parse(@view(s[frange[1]:end]), raise=false, depwarn=false) == prefix.args[end] + end end if isinfix prefix = prefix.args[end] @@ -1220,33 +1219,35 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif partial = string[1:pos] inc_tag = Base.incomplete_tag(Meta.parse(partial, raise=false, depwarn=false)) - # ?(x, y)TAB lists methods you can call with these objects - # ?(x, y TAB lists methods that take these objects as the first two arguments - # MyModule.?(x, y)TAB restricts the search to names in MyModule - rexm = match(r"(\w+\.|)\?\((.*)$", partial) - if rexm !== nothing - # Get the module scope - if isempty(rexm.captures[1]) - callee_module = context_module - else - modname = Symbol(rexm.captures[1][1:end-1]) - if isdefined(context_module, modname) - callee_module = getfield(context_module, modname) - if !isa(callee_module, Module) + if !hint # require a tab press for completion of these + # ?(x, y)TAB lists methods you can call with these objects + # ?(x, y TAB lists methods that take these objects as the first two arguments + # MyModule.?(x, y)TAB restricts the search to names in MyModule + rexm = match(r"(\w+\.|)\?\((.*)$", partial) + if rexm !== nothing + # Get the module scope + if isempty(rexm.captures[1]) + callee_module = context_module + else + modname = Symbol(rexm.captures[1][1:end-1]) + if isdefined(context_module, modname) + callee_module = getfield(context_module, modname) + if !isa(callee_module, Module) + callee_module = context_module + end + else callee_module = context_module end - else - callee_module = context_module end - end - moreargs = !endswith(rexm.captures[2], ')') - callstr = "_(" * rexm.captures[2] - if moreargs - callstr *= ')' - end - ex_org = Meta.parse(callstr, raise=false, depwarn=false) - if isa(ex_org, Expr) - return complete_any_methods(ex_org, callee_module::Module, context_module, moreargs, shift), (0:length(rexm.captures[1])+1) .+ rexm.offset, false + moreargs = !endswith(rexm.captures[2], ')') + callstr = "_(" * rexm.captures[2] + if moreargs + callstr *= ')' + end + ex_org = Meta.parse(callstr, raise=false, depwarn=false) + if isa(ex_org, Expr) + return complete_any_methods(ex_org, callee_module::Module, context_module, moreargs, shift), (0:length(rexm.captures[1])+1) .+ rexm.offset, false + end end end @@ -1351,10 +1352,12 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif if !isnothing(path) paths, dir, success = complete_path(path::String, string_escape=true) - if close_path_completion(dir, paths, path, pos) - p = (paths[1]::PathCompletion).path * "\"" + if length(paths) == 1 + p = (paths[1]::PathCompletion).path hint && was_expanded && (p = contractuser(p)) - paths[1] = PathCompletion(p) + if close_path_completion(dir, p, path, pos) + paths[1] = PathCompletion(p * "\"") + end end if success && !isempty(dir) diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl index 777017b12a6cf..7e0f5f8686f58 100644 --- a/stdlib/REPL/test/replcompletions.jl +++ b/stdlib/REPL/test/replcompletions.jl @@ -340,6 +340,12 @@ end # inexistent completion inside a cmd @test_nocompletion("run(`lol") +# issue 55856: copy(A'). errors in the REPL +let + c, r = test_complete("copy(A').") + @test isempty(c) +end + # test latex symbol completions let s = "\\alpha" c, r = test_bslashcomplete(s) @@ -1230,7 +1236,7 @@ let current_dir, forbidden e isa Base.IOError && occursin("ELOOP", e.msg) end c, r = test_complete("\"$(escape_string(path))/selfsym") - @test c == ["selfsymlink"] + @test c == ["selfsymlink\""] end end @@ -1351,6 +1357,31 @@ let (c, r, res) = test_complete("\"~/ka8w5rsz") c, r, res = test_complete("\"foo~bar") @test !res end +if !Sys.iswindows() + # create a dir and file temporarily in the home directory + path = mkpath(joinpath(homedir(), "Zx6Wa0GkC0")) + touch(joinpath(path, "my_file")) + try + let (c, r, res) = test_complete("\"~/Zx6Wa0GkC") + @test res + @test c == String["Zx6Wa0GkC0/"] + end + let (c, r, res) = test_complete("\"~/Zx6Wa0GkC0") + @test res + @test c == String[homedir() * "/Zx6Wa0GkC0"] + end + let (c, r, res) = test_complete("\"~/Zx6Wa0GkC0/my_") + @test res + @test c == String["my_file\""] + end + let (c, r, res) = test_complete("\"~/Zx6Wa0GkC0/my_file") + @test res + @test c == String[homedir() * "/Zx6Wa0GkC0/my_file"] + end + finally + rm(path, recursive=true) + end +end # Test the completion returns nothing when the folder do not exist let (c, r) = test_complete("cd(\"folder_do_not_exist_77/file") diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl index 46bc2d8790cec..cf906591b9962 100644 --- a/stdlib/Test/src/Test.jl +++ b/stdlib/Test/src/Test.jl @@ -812,7 +812,11 @@ function do_test_throws(result::ExecutionResult, orig_expr, extype) if extype isa LoadError && !(exc isa LoadError) && typeof(extype.error) == typeof(exc) extype = extype.error # deprecated end - if isa(exc, typeof(extype)) + # Support `UndefVarError(:x)` meaning `UndefVarError(:x, scope)` for any `scope`. + # Retains the behaviour from pre-v1.11 when `UndefVarError` didn't have `scope`. + if isa(extype, UndefVarError) && !isdefined(extype, :scope) + success = exc isa UndefVarError && exc.var == extype.var + else isa(exc, typeof(extype)) success = true for fld in 1:nfields(extype) if !isequal(getfield(extype, fld), getfield(exc, fld)) diff --git a/stdlib/Test/test/runtests.jl b/stdlib/Test/test/runtests.jl index 3ddcd7d5de0fd..0c08f78ef356f 100644 --- a/stdlib/Test/test/runtests.jl +++ b/stdlib/Test/test/runtests.jl @@ -1736,3 +1736,20 @@ end This is deprecated and may error in the future.""" @test_deprecated msg2 @macroexpand @testset DefaultTestSet DefaultTestSet begin end end + +# Issue #54082 +module M54082 end +@testset "@test_throws UndefVarError(:var)" begin + # Single-arg `UndefVarError` should match all `UndefVarError` for the + # same variable name, regardless of scope, to keep pre-v1.11 behaviour. + f54082() = var + @test_throws UndefVarError(:var) f54082() + # But if scope is set, then it has to match. + @test_throws UndefVarError(:var, M54082) M54082.var + let result = @testset NoThrowTestSet begin + # Wrong module scope + @test_throws UndefVarError(:var, Main) M54082.var + end + @test only(result) isa Test.Fail + end +end diff --git a/test/char.jl b/test/char.jl index 3100add0e81c5..5523125529031 100644 --- a/test/char.jl +++ b/test/char.jl @@ -288,6 +288,10 @@ Base.codepoint(c::ASCIIChar) = reinterpret(UInt8, c) @test string(ASCIIChar('x')) == "x" @test length(ASCIIChar('x')) == 1 @test !isempty(ASCIIChar('x')) + @test ndims(ASCIIChar('x')) == 0 + @test ndims(ASCIIChar) == 0 + @test firstindex(ASCIIChar('x')) == 1 + @test lastindex(ASCIIChar('x')) == 1 @test eltype(ASCIIChar) == ASCIIChar @test_throws MethodError write(IOBuffer(), ASCIIChar('x')) @test_throws MethodError read(IOBuffer('x'), ASCIIChar) diff --git a/test/choosetests.jl b/test/choosetests.jl index 96d230d185c71..affdee412bd86 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -29,7 +29,7 @@ const TESTNAMES = [ "channels", "iostream", "secretbuffer", "specificity", "reinterpretarray", "syntax", "corelogging", "missing", "asyncmap", "smallarrayshrink", "opaque_closure", "filesystem", "download", - "scopedvalues", "compileall" + "scopedvalues", "compileall", "rebinding" ] const INTERNET_REQUIRED_LIST = [ diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index 71f9da04baa4a..dd62e329962c6 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -6055,3 +6055,11 @@ f55916(::Vararg{T,T}) where {T} = "2" g55916(x) = f55916(x) # this shouldn't error @test only(code_typed(g55916, (Any,); optimize=false))[2] == Int + +# JuliaLang/julia#56248 +@test Base.infer_return_type() do + TypeVar(:Issue56248, 1) +end === Union{} +@test Base.infer_return_type() do + TypeVar(:Issue56248, Any, 1) +end === Union{} diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index 740ac5f4958e4..13ef05db2f23a 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -1967,3 +1967,32 @@ let f = (x)->nothing, mi = Base.method_instance(f, (Base.RefValue{Nothing},)), c ir = Core.Compiler.sroa_pass!(ir, inlining) Core.Compiler.verify_ir(ir) end + +let code = Any[ + # block 1 + GotoNode(4), # skip + # block 2 + Expr(:leave, SSAValue(1)), # not domsorted - make sure we move it correctly + # block 3 + ReturnNode(2), + # block 4 + EnterNode(7), + # block 5 + GotoIfNot(Argument(1), 2), + # block 6 + Expr(:leave, SSAValue(1)), + # block 7 + ReturnNode(1), + # block 8 + ReturnNode(nothing), + ] + ir = make_ircode(code; ssavaluetypes=Any[Any, Any, Union{}, Any, Any, Any, Union{}, Union{}]) + @test length(ir.cfg.blocks) == 8 + Core.Compiler.verify_ir(ir) + + # The IR should remain valid after domsorting + # (esp. including the insertion of new BasicBlocks for any fix-ups) + domtree = Core.Compiler.construct_domtree(ir) + ir = Core.Compiler.domsort_ssa!(ir, domtree) + Core.Compiler.verify_ir(ir) +end diff --git a/test/docs.jl b/test/docs.jl index 92d45fe05e397..8db9db30b8463 100644 --- a/test/docs.jl +++ b/test/docs.jl @@ -101,7 +101,7 @@ end @test Docs.undocumented_names(_ModuleWithUndocumentedNames) == [Symbol("@foo"), :f, :⨳] @test isempty(Docs.undocumented_names(_ModuleWithSomeDocumentedNames)) -@test Docs.undocumented_names(_ModuleWithSomeDocumentedNames; private=true) == [:eval, :g, :include] +@test Docs.undocumented_names(_ModuleWithSomeDocumentedNames; private=true) == [:g] # issue #11548 diff --git a/test/exceptions.jl b/test/exceptions.jl index eb0bbaec35090..1e52c7a2fe2c3 100644 --- a/test/exceptions.jl +++ b/test/exceptions.jl @@ -241,6 +241,18 @@ end end end)() @test length(Base.current_exceptions()) == 0 + + (()-> begin + while true + try + error("foo") + finally + break + end + end + @test length(Base.current_exceptions()) == 0 + end)() + @test length(Base.current_exceptions()) == 0 end @testset "Deep exception stacks" begin diff --git a/test/llvmpasses/alloc-opt-pass.ll b/test/llvmpasses/alloc-opt-pass.ll index b962157120456..665687e86835d 100644 --- a/test/llvmpasses/alloc-opt-pass.ll +++ b/test/llvmpasses/alloc-opt-pass.ll @@ -76,7 +76,7 @@ L3: ; preds = %L2, %L1, %0 ; CHECK-LABEL: @legal_int_types ; CHECK: alloca [12 x i8] ; CHECK-NOT: alloca i96 -; CHECK: store [12 x i8] zeroinitializer, +; CHECK: call void @llvm.memset.p0.i64(ptr align 16 %var1, ; CHECK: ret void define void @legal_int_types() { %pgcstack = call ptr @julia.get_pgcstack() @@ -140,7 +140,7 @@ L2: ; preds = %0 ; CHECK: alloca ; CHECK-NOT: call token(...) @llvm.julia.gc_preserve_begin ; CHECK: call void @llvm.lifetime.start -; CHECK: store [8 x i8] zeroinitializer, +; CHECK: call void @llvm.memset.p0.i64(ptr align 16 %v, ; CHECK-NOT: call void @llvm.lifetime.end define void @lifetime_no_preserve_end(ptr noalias nocapture noundef nonnull sret({}) %0) { %pgcstack = call ptr @julia.get_pgcstack() @@ -164,11 +164,8 @@ define void @lifetime_no_preserve_end(ptr noalias nocapture noundef nonnull sret ; CHECK: alloca [1 x i8] ; CHECK-DAG: alloca [2 x i8] ; CHECK-DAG: alloca [3 x i8] -; CHECK-DAG: freeze [1 x i8] undef -; CHECK-DAG: store [1 x i8] % -; CHECK-DAG: store [3 x i8] zeroinitializer, -; CHECK-NOT: store -; CHECK-NOT: zeroinitializer +; CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 1 %var1, +; CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 4 %var7, ; CHECK: ret void define void @initializers() { %pgcstack = call ptr @julia.get_pgcstack() diff --git a/test/loading.jl b/test/loading.jl index 9e7e40ff3b50a..ecba64ca45a73 100644 --- a/test/loading.jl +++ b/test/loading.jl @@ -1129,25 +1129,6 @@ end run(cmd_proj_ext) end - # Sysimage extensions - # The test below requires that LinearAlgebra is in the sysimage and that it has not been loaded yet. - # if it gets moved out, this test will need to be updated. - # We run this test in a new process so we are not vulnerable to a previous test having loaded LinearAlgebra - sysimg_ext_test_code = """ - uuid_key = Base.PkgId(Base.UUID("37e2e46d-f89d-539d-b4ee-838fcccc9c8e"), "LinearAlgebra") - Base.in_sysimage(uuid_key) || error("LinearAlgebra not in sysimage") - haskey(Base.explicit_loaded_modules, uuid_key) && error("LinearAlgebra already loaded") - using HasExtensions - Base.get_extension(HasExtensions, :LinearAlgebraExt) === nothing || error("unexpectedly got an extension") - using LinearAlgebra - haskey(Base.explicit_loaded_modules, uuid_key) || error("LinearAlgebra not loaded") - Base.get_extension(HasExtensions, :LinearAlgebraExt) isa Module || error("expected extension to load") - """ - cmd = `$(Base.julia_cmd()) --startup-file=no -e $sysimg_ext_test_code` - cmd = addenv(cmd, "JULIA_LOAD_PATH" => join([proj, "@stdlib"], sep)) - run(cmd) - - # Extensions in implicit environments old_load_path = copy(LOAD_PATH) try @@ -1225,10 +1206,7 @@ end @test cf.check_bounds == 3 @test cf.inline @test cf.opt_level == 3 - - io = PipeBuffer() - show(io, cf) - @test read(io, String) == "use_pkgimages = true, debug_level = 3, check_bounds = 3, inline = true, opt_level = 3" + @test repr(cf) == "CacheFlags(; use_pkgimages=true, debug_level=3, check_bounds=3, inline=true, opt_level=3)" end empty!(Base.DEPOT_PATH) @@ -1420,13 +1398,16 @@ end "JULIA_DEPOT_PATH" => depot_path, "JULIA_DEBUG" => "loading") - out = Pipe() - proc = run(pipeline(cmd, stdout=out, stderr=out)) - close(out.in) - - log = @async String(read(out)) - @test success(proc) - fetch(log) + out = Base.PipeEndpoint() + log = @async read(out, String) + try + proc = run(pipeline(cmd, stdout=out, stderr=out)) + @test success(proc) + catch + @show fetch(log) + rethrow() + end + return fetch(log) end log = load_package("Parent", `--compiled-modules=no --pkgimages=no`) diff --git a/test/numbers.jl b/test/numbers.jl index fc3dc2c06bb7c..dc4f2cb613d77 100644 --- a/test/numbers.jl +++ b/test/numbers.jl @@ -2937,6 +2937,14 @@ end @test log(π,ComplexF32(2)) isa ComplexF32 end +@testset "irrational promotion shouldn't recurse without bound, issue #51001" begin + for s ∈ (:π, :ℯ) + T = Irrational{s} + @test promote_type(Complex{T}, T) <: Complex + @test promote_type(T, Complex{T}) <: Complex + end +end + @testset "printing non finite floats" begin let float_types = Set() allsubtypes!(Base, AbstractFloat, float_types) diff --git a/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml index 5706aba59d1e0..f659a59e0910b 100644 --- a/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml +++ b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml @@ -25,17 +25,12 @@ deps = ["ExtDep3"] path = "../HasExtensions.jl" uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8" version = "0.1.0" +weakdeps = ["ExtDep", "ExtDep2"] [deps.HasExtensions.extensions] Extension = "ExtDep" ExtensionDep = "ExtDep3" ExtensionFolder = ["ExtDep", "ExtDep2"] - LinearAlgebraExt = "LinearAlgebra" - - [deps.HasExtensions.weakdeps] - ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c" - ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d" - LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [[deps.SomePackage]] path = "../SomePackage" diff --git a/test/project/Extensions/HasExtensions.jl/Project.toml b/test/project/Extensions/HasExtensions.jl/Project.toml index fe21a1423f543..a02f5662d602d 100644 --- a/test/project/Extensions/HasExtensions.jl/Project.toml +++ b/test/project/Extensions/HasExtensions.jl/Project.toml @@ -8,10 +8,8 @@ ExtDep3 = "a5541f1e-a556-4fdc-af15-097880d743a1" [weakdeps] ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c" ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d" -LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [extensions] Extension = "ExtDep" ExtensionDep = "ExtDep3" ExtensionFolder = ["ExtDep", "ExtDep2"] -LinearAlgebraExt = "LinearAlgebra" diff --git a/test/project/Extensions/HasExtensions.jl/ext/LinearAlgebraExt.jl b/test/project/Extensions/HasExtensions.jl/ext/LinearAlgebraExt.jl deleted file mode 100644 index 19f87cb849417..0000000000000 --- a/test/project/Extensions/HasExtensions.jl/ext/LinearAlgebraExt.jl +++ /dev/null @@ -1,3 +0,0 @@ -module LinearAlgebraExt - -end diff --git a/test/read.jl b/test/read.jl index 34224c146864e..99903d92d270f 100644 --- a/test/read.jl +++ b/test/read.jl @@ -268,13 +268,27 @@ for (name, f) in l n2 = readbytes!(s2, a2) @test n1 == n2 @test length(a1) == length(a2) - @test a1[1:n1] == a2[1:n2] + let l = min(l, n) + @test a1[1:l] == a2[1:l] + end @test n <= length(text) || eof(s1) @test n <= length(text) || eof(s2) cleanup() end + # Test growing output array + let x = UInt8[], + io = io() + n = readbytes!(io, x) + @test n == 0 + @test isempty(x) + n = readbytes!(io, x, typemax(Int)) + @test n == length(x) + @test x == codeunits(text) + cleanup() + end + verbose && println("$name read!...") l = length(text) for n = [1, 2, l-2, l-1, l] @@ -477,12 +491,6 @@ let s = "qwerty" @test read(IOBuffer(s)) == codeunits(s) @test read(IOBuffer(s), 10) == codeunits(s) @test read(IOBuffer(s), 1) == codeunits(s)[1:1] - - # Test growing output array - x = UInt8[] - n = readbytes!(IOBuffer(s), x, 10) - @test x == codeunits(s) - @test n == length(x) end diff --git a/test/rebinding.jl b/test/rebinding.jl new file mode 100644 index 0000000000000..564be70e44913 --- /dev/null +++ b/test/rebinding.jl @@ -0,0 +1,24 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +module Rebinding + using Test + + @test Base.binding_kind(@__MODULE__, :Foo) == Base.BINDING_KIND_GUARD + struct Foo + x::Int + end + const defined_world_age = Base.tls_world_age() + x = Foo(1) + + @test Base.binding_kind(@__MODULE__, :Foo) == Base.BINDING_KIND_CONST + @test !contains(repr(x), "@world") + Base.delete_binding(@__MODULE__, :Foo) + + @test Base.binding_kind(@__MODULE__, :Foo) == Base.BINDING_KIND_GUARD + @test contains(repr(x), "@world") + + # Tests for @world syntax + @test Base.@world(Foo, defined_world_age) == typeof(x) + @test Base.@world(Rebinding.Foo, defined_world_age) == typeof(x) + @test Base.@world((@__MODULE__).Foo, defined_world_age) == typeof(x) +end diff --git a/test/reflection.jl b/test/reflection.jl index 634390e0680d1..8c701acb9c09d 100644 --- a/test/reflection.jl +++ b/test/reflection.jl @@ -179,7 +179,7 @@ let @test Base.binding_module(TestMod7648.TestModSub9475, :b9475) == TestMod7648.TestModSub9475 defaultset = Set(Symbol[:Foo7648, :TestMod7648, :a9475, :c7648, :f9475, :foo7648, :foo7648_nomethods]) allset = defaultset ∪ Set(Symbol[ - Symbol("#eval"), Symbol("#foo7648"), Symbol("#foo7648_nomethods"), Symbol("#include"), + Symbol("#foo7648"), Symbol("#foo7648_nomethods"), :TestModSub9475, :d7648, :eval, :f7648, :include]) imported = Set(Symbol[:convert, :curmod_name, :curmod]) usings_from_Test = Set(Symbol[ @@ -265,7 +265,7 @@ let defaultset = Set((:A,)) imported = Set((:M2,)) usings_from_Base = delete!(Set(names(Module(); usings=true)), :anonymous) # the name of the anonymous module itself usings = Set((:A, :f, :C, :y, :M1, :m1_x)) ∪ usings_from_Base - allset = Set((:A, :B, :C, :eval, :include, Symbol("#eval"), Symbol("#include"))) + allset = Set((:A, :B, :C, :eval, :include)) @test Set(names(TestMod54609.A)) == defaultset @test Set(names(TestMod54609.A, imported=true)) == defaultset ∪ imported @test Set(names(TestMod54609.A, usings=true)) == defaultset ∪ usings