diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fd7f1c89420d6..9a3fe2cd441b3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -278,8 +278,8 @@ Be sure to change the UUID value back before making the pull request. The process of [creating a patch release](https://docs.julialang.org/en/v1/devdocs/build/distributing/#Point-releasing-101) is roughly as follows: -1. Create a new branch (e.g. `backports-release-1.6`) against the relevant minor release - branch (e.g. `release-1.6`). Usually a corresponding pull request is created as well. +1. Create a new branch (e.g. `backports-release-1.10`) against the relevant minor release + branch (e.g. `release-1.10`). Usually a corresponding pull request is created as well. 2. Add commits, nominally from `master` (hence "backports"), to that branch. See below for more information on this process. @@ -291,8 +291,8 @@ The process of [creating a patch release](https://docs.julialang.org/en/v1/devdo the pull request associated with the backports branch. Fix any issues. 4. Once all test and benchmark reports look good, merge the backports branch into - the corresponding release branch (e.g. merge `backports-release-1.6` into - `release-1.6`). + the corresponding release branch (e.g. merge `backports-release-1.10` into + `release-1.10`). 5. Open a pull request that bumps the version of the relevant minor release to the next patch version, e.g. as in [this pull request](https://github.com/JuliaLang/julia/pull/37718). diff --git a/Compiler/extras/CompilerDevTools/Manifest.toml b/Compiler/extras/CompilerDevTools/Manifest.toml new file mode 100644 index 0000000000000..bcc78f1ded34a --- /dev/null +++ b/Compiler/extras/CompilerDevTools/Manifest.toml @@ -0,0 +1,15 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.12.0-DEV" +manifest_format = "2.0" +project_hash = "84f495a1bf065c95f732a48af36dd0cd2cefb9d5" + +[[deps.Compiler]] +path = "../.." +uuid = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1" +version = "0.0.2" + +[[deps.CompilerDevTools]] +path = "." +uuid = "92b2d91f-d2bd-4c05-9214-4609ac33433f" +version = "0.0.0" diff --git a/Compiler/extras/CompilerDevTools/Project.toml b/Compiler/extras/CompilerDevTools/Project.toml new file mode 100644 index 0000000000000..a2749a9a56a84 --- /dev/null +++ b/Compiler/extras/CompilerDevTools/Project.toml @@ -0,0 +1,5 @@ +name = "CompilerDevTools" +uuid = "92b2d91f-d2bd-4c05-9214-4609ac33433f" + +[deps] +Compiler = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1" diff --git a/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl new file mode 100644 index 0000000000000..5d0df5ccaa4e4 --- /dev/null +++ b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl @@ -0,0 +1,48 @@ +module CompilerDevTools + +using Compiler +using Core.IR + +struct SplitCacheOwner; end +struct SplitCacheInterp <: Compiler.AbstractInterpreter + world::UInt + inf_params::Compiler.InferenceParams + opt_params::Compiler.OptimizationParams + inf_cache::Vector{Compiler.InferenceResult} + function SplitCacheInterp(; + world::UInt = Base.get_world_counter(), + inf_params::Compiler.InferenceParams = Compiler.InferenceParams(), + opt_params::Compiler.OptimizationParams = Compiler.OptimizationParams(), + inf_cache::Vector{Compiler.InferenceResult} = Compiler.InferenceResult[]) + new(world, inf_params, opt_params, inf_cache) + end +end + +Compiler.InferenceParams(interp::SplitCacheInterp) = interp.inf_params +Compiler.OptimizationParams(interp::SplitCacheInterp) = interp.opt_params +Compiler.get_inference_world(interp::SplitCacheInterp) = interp.world +Compiler.get_inference_cache(interp::SplitCacheInterp) = interp.inf_cache +Compiler.cache_owner(::SplitCacheInterp) = SplitCacheOwner() + +import Core.OptimizedGenerics.CompilerPlugins: typeinf, typeinf_edge +@eval @noinline typeinf(::SplitCacheOwner, mi::MethodInstance, source_mode::UInt8) = + Base.invoke_in_world(which(typeinf, Tuple{SplitCacheOwner, MethodInstance, UInt8}).primary_world, Compiler.typeinf_ext, SplitCacheInterp(; world=Base.tls_world_age()), mi, source_mode) + +@eval @noinline function typeinf_edge(::SplitCacheOwner, mi::MethodInstance, parent_frame::Compiler.InferenceState, world::UInt, source_mode::UInt8) + # TODO: This isn't quite right, we're just sketching things for now + interp = SplitCacheInterp(; world) + Compiler.typeinf_edge(interp, mi.def, mi.specTypes, Core.svec(), parent_frame, false, false) +end + +function with_new_compiler(f, args...) + mi = @ccall jl_method_lookup(Any[f, args...]::Ptr{Any}, (1+length(args))::Csize_t, Base.tls_world_age()::Csize_t)::Ref{Core.MethodInstance} + world = Base.tls_world_age() + new_compiler_ci = Core.OptimizedGenerics.CompilerPlugins.typeinf( + SplitCacheOwner(), mi, Compiler.SOURCE_MODE_ABI + ) + invoke(f, new_compiler_ci, args...) +end + +export with_new_compiler + +end diff --git a/Compiler/src/Compiler.jl b/Compiler/src/Compiler.jl index 2cf7e5508196c..8dd15462ed998 100644 --- a/Compiler/src/Compiler.jl +++ b/Compiler/src/Compiler.jl @@ -181,12 +181,26 @@ include("bootstrap.jl") include("reflection_interface.jl") include("opaque_closure.jl") +macro __SOURCE_FILE__() + __source__.file === nothing && return nothing + return QuoteNode(__source__.file::Symbol) +end + module IRShow end +function load_irshow!() + if isdefined(Base, :end_base_include) + # This code path is exclusively for Revise, which may want to re-run this + # after bootstrap. + include(IRShow, Base.joinpath(Base.dirname(Base.String(@__SOURCE_FILE__)), "ssair/show.jl")) + else + include(IRShow, "ssair/show.jl") + end +end if !isdefined(Base, :end_base_include) # During bootstrap, skip including this file and defer it to base/show.jl to include later else # When this module is loaded as the standard library, include this file as usual - include(IRShow, "ssair/show.jl") + load_irshow!() end end # baremodule Compiler diff --git a/Compiler/src/abstractinterpretation.jl b/Compiler/src/abstractinterpretation.jl index 5946adf80ad52..2efd8a2b7264f 100644 --- a/Compiler/src/abstractinterpretation.jl +++ b/Compiler/src/abstractinterpretation.jl @@ -2218,16 +2218,47 @@ function abstract_invoke(interp::AbstractInterpreter, arginfo::ArgInfo, si::Stmt ft = widenconst(ft′) ft === Bottom && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())) types = argtype_by_index(argtypes, 3) - if types isa Const && types.val isa Method - method = types.val::Method - types = method # argument value - lookupsig = method.sig # edge kind - argtype = argtypes_to_type(pushfirst!(argtype_tail(argtypes, 4), ft)) - nargtype = typeintersect(lookupsig, argtype) - nargtype === Bottom && return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())) - nargtype isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # other cases are not implemented below + if types isa Const && types.val isa Union{Method, CodeInstance} + method_or_ci = types.val + if isa(method_or_ci, CodeInstance) + our_world = sv.world.this + argtype = argtypes_to_type(pushfirst!(argtype_tail(argtypes, 4), ft)) + specsig = method_or_ci.def.specTypes + defdef = method_or_ci.def.def + exct = method_or_ci.exctype + if !hasintersect(argtype, specsig) + return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())) + elseif !(argtype <: specsig) || (isa(defdef, Method) && !(argtype <: defdef.sig)) + exct = Union{exct, TypeError} + end + callee_valid_range = WorldRange(method_or_ci.min_world, method_or_ci.max_world) + if !(our_world in callee_valid_range) + if our_world < first(callee_valid_range) + update_valid_age!(sv, WorldRange(first(sv.world.valid_worlds), first(callee_valid_range)-1)) + else + update_valid_age!(sv, WorldRange(last(callee_valid_range)+1, last(sv.world.valid_worlds))) + end + return Future(CallMeta(Bottom, ErrorException, EFFECTS_THROWS, NoCallInfo())) + end + # TODO: When we add curing, we may want to assume this is nothrow + if (method_or_ci.owner === Nothing && method_ir_ci.def.def isa Method) + exct = Union{exct, ErrorException} + end + update_valid_age!(sv, callee_valid_range) + return Future(CallMeta(method_or_ci.rettype, exct, Effects(decode_effects(method_or_ci.ipo_purity_bits), nothrow=(exct===Bottom)), + InvokeCICallInfo(method_or_ci))) + else + method = method_or_ci::Method + types = method # argument value + lookupsig = method.sig # edge kind + argtype = argtypes_to_type(pushfirst!(argtype_tail(argtypes, 4), ft)) + nargtype = typeintersect(lookupsig, argtype) + nargtype === Bottom && return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())) + nargtype isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # other cases are not implemented below + # Fall through to generic invoke handling + end else - widenconst(types) >: Method && return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) + hasintersect(widenconst(types), Union{Method, CodeInstance}) && return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3), false) isexact || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) unwrapped = unwrap_unionall(types) @@ -4020,13 +4051,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState, nextr end effects === nothing || merge_override_effects!(interp, effects, frame) if lhs !== nothing && rt !== Bottom - if isa(lhs, SlotNumber) - changes = StateUpdate(lhs, VarState(rt, false)) - elseif isa(lhs, GlobalRef) - handle_global_assignment!(interp, frame, currsaw_latestworld, lhs, rt) - else - merge_effects!(interp, frame, EFFECTS_UNKNOWN) - end + changes = StateUpdate(lhs::SlotNumber, VarState(rt, false)) end end if !has_curr_ssaflag(frame, IR_FLAG_NOTHROW) diff --git a/Compiler/src/abstractlattice.jl b/Compiler/src/abstractlattice.jl index c1f3050739170..7a9cff8918175 100644 --- a/Compiler/src/abstractlattice.jl +++ b/Compiler/src/abstractlattice.jl @@ -229,7 +229,7 @@ end if isa(t, Const) # don't consider mutable values useful constants val = t.val - return isa(val, Symbol) || isa(val, Type) || isa(val, Method) || !ismutable(val) + return isa(val, Symbol) || isa(val, Type) || isa(val, Method) || isa(val, CodeInstance) || !ismutable(val) end isa(t, PartialTypeVar) && return false # this isn't forwardable return is_const_prop_profitable_arg(widenlattice(𝕃), t) diff --git a/Compiler/src/bootstrap.jl b/Compiler/src/bootstrap.jl index 7ee439cc7ac67..475c53e317152 100644 --- a/Compiler/src/bootstrap.jl +++ b/Compiler/src/bootstrap.jl @@ -5,7 +5,15 @@ # especially try to make sure any recursive and leaf functions have concrete signatures, # since we won't be able to specialize & infer them at runtime -activate_codegen!() = ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext_toplevel) +function activate_codegen!() + ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext_toplevel) + Core.eval(Compiler, quote + let typeinf_world_age = Base.tls_world_age() + @eval Core.OptimizedGenerics.CompilerPlugins.typeinf(::Nothing, mi::MethodInstance, source_mode::UInt8) = + Base.invoke_in_world($(Expr(:$, :typeinf_world_age)), typeinf_ext_toplevel, mi, Base.tls_world_age(), source_mode) + end + end) +end function bootstrap!() let time() = ccall(:jl_clock_now, Float64, ()) diff --git a/Compiler/src/inferencestate.jl b/Compiler/src/inferencestate.jl index 9eb929b725fbf..6988e74310fc5 100644 --- a/Compiler/src/inferencestate.jl +++ b/Compiler/src/inferencestate.jl @@ -267,6 +267,7 @@ mutable struct InferenceState bb_vartables::Vector{Union{Nothing,VarTable}} # nothing if not analyzed yet bb_saw_latestworld::Vector{Bool} ssavaluetypes::Vector{Any} + ssaflags::Vector{UInt32} edges::Vector{Any} stmt_info::Vector{CallInfo} @@ -343,6 +344,7 @@ mutable struct InferenceState bb_vartable1[i] = VarState(argtyp, i > nargtypes) end src.ssavaluetypes = ssavaluetypes = Any[ NOT_FOUND for i = 1:nssavalues ] + ssaflags = copy(src.ssaflags) unreachable = BitSet() pclimitations = IdSet{InferenceState}() @@ -374,7 +376,7 @@ mutable struct InferenceState this = new( mi, WorldWithRange(world, valid_worlds), mod, sptypes, slottypes, src, cfg, spec_info, - currbb, currpc, ip, handler_info, ssavalue_uses, bb_vartables, bb_saw_latestworld, ssavaluetypes, edges, stmt_info, + currbb, currpc, ip, handler_info, ssavalue_uses, bb_vartables, bb_saw_latestworld, ssavaluetypes, ssaflags, edges, stmt_info, tasks, pclimitations, limitations, cycle_backedges, callstack, parentid, frameid, cycleid, result, unreachable, bestguess, exc_bestguess, ipo_effects, restrict_abstract_call_sites, cache_mode, insert_coverage, @@ -1004,25 +1006,22 @@ function callers_in_cycle(sv::InferenceState) end callers_in_cycle(sv::IRInterpretationState) = AbsIntCycle(sv.callstack::Vector{AbsIntState}, 0, 0) -get_curr_ssaflag(sv::InferenceState) = sv.src.ssaflags[sv.currpc] +get_curr_ssaflag(sv::InferenceState) = sv.ssaflags[sv.currpc] get_curr_ssaflag(sv::IRInterpretationState) = sv.ir.stmts[sv.curridx][:flag] -has_curr_ssaflag(sv::InferenceState, flag::UInt32) = has_flag(sv.src.ssaflags[sv.currpc], flag) +has_curr_ssaflag(sv::InferenceState, flag::UInt32) = has_flag(sv.ssaflags[sv.currpc], flag) has_curr_ssaflag(sv::IRInterpretationState, flag::UInt32) = has_flag(sv.ir.stmts[sv.curridx][:flag], flag) function set_curr_ssaflag!(sv::InferenceState, flag::UInt32, mask::UInt32=typemax(UInt32)) - curr_flag = sv.src.ssaflags[sv.currpc] - sv.src.ssaflags[sv.currpc] = (curr_flag & ~mask) | flag -end -function set_curr_ssaflag!(sv::IRInterpretationState, flag::UInt32, mask::UInt32=typemax(UInt32)) - curr_flag = sv.ir.stmts[sv.curridx][:flag] - sv.ir.stmts[sv.curridx][:flag] = (curr_flag & ~mask) | flag + curr_flag = sv.ssaflags[sv.currpc] + sv.ssaflags[sv.currpc] = (curr_flag & ~mask) | flag + nothing end -add_curr_ssaflag!(sv::InferenceState, flag::UInt32) = sv.src.ssaflags[sv.currpc] |= flag +add_curr_ssaflag!(sv::InferenceState, flag::UInt32) = sv.ssaflags[sv.currpc] |= flag add_curr_ssaflag!(sv::IRInterpretationState, flag::UInt32) = add_flag!(sv.ir.stmts[sv.curridx], flag) -sub_curr_ssaflag!(sv::InferenceState, flag::UInt32) = sv.src.ssaflags[sv.currpc] &= ~flag +sub_curr_ssaflag!(sv::InferenceState, flag::UInt32) = sv.ssaflags[sv.currpc] &= ~flag sub_curr_ssaflag!(sv::IRInterpretationState, flag::UInt32) = sub_flag!(sv.ir.stmts[sv.curridx], flag) function merge_effects!(::AbstractInterpreter, caller::InferenceState, effects::Effects) @@ -1035,8 +1034,8 @@ function merge_effects!(::AbstractInterpreter, caller::InferenceState, effects:: end merge_effects!(::AbstractInterpreter, ::IRInterpretationState, ::Effects) = return -decode_statement_effects_override(sv::AbsIntState) = - decode_statement_effects_override(get_curr_ssaflag(sv)) +decode_statement_effects_override(sv::InferenceState) = decode_statement_effects_override(sv.src.ssaflags[sv.currpc]) +decode_statement_effects_override(sv::IRInterpretationState) = decode_statement_effects_override(UInt32(0)) struct InferenceLoopState rt diff --git a/Compiler/src/optimize.jl b/Compiler/src/optimize.jl index d2dfd26bfa00d..1c02bd67b5bd4 100644 --- a/Compiler/src/optimize.jl +++ b/Compiler/src/optimize.jl @@ -17,37 +17,41 @@ const SLOT_USEDUNDEF = 32 # slot has uses that might raise UndefVarError const IR_FLAG_NULL = zero(UInt32) # This statement is marked as @inbounds by user. -# Ff replaced by inlining, any contained boundschecks may be removed. +# If replaced by inlining, any contained boundschecks may be removed. const IR_FLAG_INBOUNDS = one(UInt32) << 0 # This statement is marked as @inline by user const IR_FLAG_INLINE = one(UInt32) << 1 # This statement is marked as @noinline by user const IR_FLAG_NOINLINE = one(UInt32) << 2 -# An optimization pass has updated this statement in a way that may -# have exposed information that inference did not see. Re-running -# inference on this statement may be profitable. -const IR_FLAG_REFINED = one(UInt32) << 3 # This statement is proven :consistent -const IR_FLAG_CONSISTENT = one(UInt32) << 4 +const IR_FLAG_CONSISTENT = one(UInt32) << 3 # This statement is proven :effect_free -const IR_FLAG_EFFECT_FREE = one(UInt32) << 5 +const IR_FLAG_EFFECT_FREE = one(UInt32) << 4 # This statement is proven :nothrow -const IR_FLAG_NOTHROW = one(UInt32) << 6 -# This statement is proven :terminates -const IR_FLAG_TERMINATES = one(UInt32) << 7 -# This statement is proven :noub -const IR_FLAG_NOUB = one(UInt32) << 8 -# TODO: Both of these should eventually go away once -# This statement is :effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY -const IR_FLAG_EFIIMO = one(UInt32) << 9 -# This statement is :inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY -const IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM = one(UInt32) << 10 +const IR_FLAG_NOTHROW = one(UInt32) << 5 +# This statement is proven :terminates_globally +const IR_FLAG_TERMINATES = one(UInt32) << 6 +#const IR_FLAG_TERMINATES_LOCALLY = one(UInt32) << 7 +#const IR_FLAG_NOTASKSTATE = one(UInt32) << 8 +#const IR_FLAG_INACCESSIBLEMEM = one(UInt32) << 9 +const IR_FLAG_NOUB = one(UInt32) << 10 +#const IR_FLAG_NOUBINIB = one(UInt32) << 11 +#const IR_FLAG_CONSISTENTOVERLAY = one(UInt32) << 12 # This statement is :nortcall -const IR_FLAG_NORTCALL = one(UInt32) << 11 +const IR_FLAG_NORTCALL = one(UInt32) << 13 +# An optimization pass has updated this statement in a way that may +# have exposed information that inference did not see. Re-running +# inference on this statement may be profitable. +const IR_FLAG_REFINED = one(UInt32) << 16 # This statement has no users and may be deleted if flags get refined to IR_FLAGS_REMOVABLE -const IR_FLAG_UNUSED = one(UInt32) << 12 +const IR_FLAG_UNUSED = one(UInt32) << 17 +# TODO: Both of these next two should eventually go away once +# This statement is :effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY +const IR_FLAG_EFIIMO = one(UInt32) << 18 +# This statement is :inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY +const IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM = one(UInt32) << 19 -const NUM_IR_FLAGS = 13 # sync with julia.h +const NUM_IR_FLAGS = 3 # sync with julia.h const IR_FLAGS_EFFECTS = IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | @@ -815,6 +819,7 @@ function scan_non_dataflow_flags!(inst::Instruction, sv::PostOptAnalysisState) sv.nortcall = false end end + nothing end function scan_inconsistency!(inst::Instruction, sv::PostOptAnalysisState) @@ -1408,16 +1413,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes) return extyp === Union{} ? 0 : UNKNOWN_CALL_COST elseif head === :(=) - if ex.args[1] isa GlobalRef - cost = UNKNOWN_CALL_COST - else - cost = 0 - end - a = ex.args[2] - if a isa Expr - cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, params)) - end - return cost + return statement_cost(ex.args[2], -1, src, sptypes, params) elseif head === :copyast return 100 end diff --git a/Compiler/src/ssair/EscapeAnalysis.jl b/Compiler/src/ssair/EscapeAnalysis.jl index 47a7840628bb5..4f32550d056b2 100644 --- a/Compiler/src/ssair/EscapeAnalysis.jl +++ b/Compiler/src/ssair/EscapeAnalysis.jl @@ -642,13 +642,6 @@ function analyze_escapes(ir::IRCode, nargs::Int, 𝕃ₒ::AbstractLattice, get_e escape_invoke!(astate, pc, stmt.args) elseif head === :new || head === :splatnew escape_new!(astate, pc, stmt.args) - elseif head === :(=) - lhs, rhs = stmt.args - if isa(lhs, GlobalRef) # global store - add_escape_change!(astate, rhs, ⊤) - else - unexpected_assignment!(ir, pc) - end elseif head === :foreigncall escape_foreigncall!(astate, pc, stmt.args) elseif head === :throw_undef_if_not # XXX when is this expression inserted ? @@ -981,11 +974,6 @@ function escape_unanalyzable_obj!(astate::AnalysisState, @nospecialize(obj), obj return objinfo end -@noinline function unexpected_assignment!(ir::IRCode, pc::Int) - @eval Main (ir = $ir; pc = $pc) - error("unexpected assignment found: inspect `Main.pc` and `Main.pc`") -end - is_nothrow(ir::IRCode, pc::Int) = has_flag(ir[SSAValue(pc)], IR_FLAG_NOTHROW) # NOTE if we don't maintain the alias set that is separated from the lattice state, we can do diff --git a/Compiler/src/ssair/verify.jl b/Compiler/src/ssair/verify.jl index 59051058e1750..072a564a31f78 100644 --- a/Compiler/src/ssair/verify.jl +++ b/Compiler/src/ssair/verify.jl @@ -363,14 +363,8 @@ function verify_ir(ir::IRCode, print::Bool=true, isforeigncall = false if isa(stmt, Expr) if stmt.head === :(=) - if stmt.args[1] isa SSAValue - @verify_error "SSAValue as assignment LHS" - raise_error() - end - if stmt.args[2] isa GlobalRef - # undefined GlobalRef as assignment RHS is OK - continue - end + @verify_error "Assignment should have been removed during SSA conversion" + raise_error() elseif stmt.head === :isdefined if length(stmt.args) > 2 || (length(stmt.args) == 2 && !isa(stmt.args[2], Bool)) @verify_error "malformed isdefined" diff --git a/Compiler/src/stmtinfo.jl b/Compiler/src/stmtinfo.jl index 830bfa02d2d99..4f55f068e9456 100644 --- a/Compiler/src/stmtinfo.jl +++ b/Compiler/src/stmtinfo.jl @@ -268,6 +268,17 @@ end add_edges_impl(edges::Vector{Any}, info::UnionSplitApplyCallInfo) = for split in info.infos; add_edges!(edges, split); end +""" + info::InvokeCICallInfo + +Represents a resolved call to `Core.invoke` targeting a `Core.CodeInstance` +""" +struct InvokeCICallInfo <: CallInfo + edge::CodeInstance +end +add_edges_impl(edges::Vector{Any}, info::InvokeCICallInfo) = + add_inlining_edge!(edges, info.edge) + """ info::InvokeCallInfo diff --git a/Compiler/src/typeinfer.jl b/Compiler/src/typeinfer.jl index 83ec0271ea474..20c0a5000bd39 100644 --- a/Compiler/src/typeinfer.jl +++ b/Compiler/src/typeinfer.jl @@ -413,6 +413,7 @@ function finishinfer!(me::InferenceState, interp::AbstractInterpreter) ipo_effects = result.ipo_effects = me.ipo_effects = adjust_effects(me) result.exc_result = me.exc_bestguess = refine_exception_type(me.exc_bestguess, ipo_effects) me.src.rettype = widenconst(ignorelimited(bestguess)) + me.src.ssaflags = me.ssaflags me.src.min_world = first(me.world.valid_worlds) me.src.max_world = last(me.world.valid_worlds) istoplevel = !(me.linfo.def isa Method) @@ -936,7 +937,7 @@ function codeinfo_for_const(interp::AbstractInterpreter, mi::MethodInstance, @no tree.slotflags = fill(0x00, nargs) tree.ssavaluetypes = 1 tree.debuginfo = DebugInfo(mi) - tree.ssaflags = UInt32[0] + tree.ssaflags = [IR_FLAG_NULL] tree.rettype = Core.Typeof(val) tree.edges = Core.svec() set_inlineable!(tree, true) diff --git a/Compiler/src/utilities.jl b/Compiler/src/utilities.jl index 29f3dfa4afd4a..da20f9aafbfb2 100644 --- a/Compiler/src/utilities.jl +++ b/Compiler/src/utilities.jl @@ -54,8 +54,8 @@ function count_const_size(@nospecialize(x), count_self::Bool = true) # No definite size (isa(x, GenericMemory) || isa(x, String) || isa(x, SimpleVector)) && return MAX_INLINE_CONST_SIZE + 1 - if isa(x, Module) || isa(x, Method) - # We allow modules and methods, because we already assume they are externally + if isa(x, Module) || isa(x, Method) || isa(x, CodeInstance) + # We allow modules, methods and CodeInstance, because we already assume they are externally # rooted, so we count their contents as 0 size. return sizeof(Ptr{Cvoid}) end diff --git a/Compiler/test/inference.jl b/Compiler/test/inference.jl index b3099897faf51..c8b599adb1323 100644 --- a/Compiler/test/inference.jl +++ b/Compiler/test/inference.jl @@ -3031,14 +3031,13 @@ end # issue #28279 # ensure that lowering doesn't move these into statement position, which would require renumbering -using Base: +, - -function f28279(b::Bool) +@eval function f28279(b::Bool) let i = 1 - while i > b - i -= 1 + while $(>)(i, b) + i = $(-)(i, 1) end if b end - return i + 1 + return $(+)(i, 1) end end code28279 = code_lowered(f28279, (Bool,))[1].code diff --git a/Compiler/test/inline.jl b/Compiler/test/inline.jl index 46b78db3b781c..5f95fb761859e 100644 --- a/Compiler/test/inline.jl +++ b/Compiler/test/inline.jl @@ -2111,7 +2111,7 @@ for run_finalizer_escape_test in (run_finalizer_escape_test1, run_finalizer_esca global finalizer_escape::Int = 0 let src = code_typed1(run_finalizer_escape_test, Tuple{Bool, Bool}) - @test any(x->isexpr(x, :(=)), src.code) + @test any(iscall((src, Core.setglobal!)), src.code) end let diff --git a/Compiler/test/interpreter_exec.jl b/Compiler/test/interpreter_exec.jl index 4972df1a27202..b1d450f8f4286 100644 --- a/Compiler/test/interpreter_exec.jl +++ b/Compiler/test/interpreter_exec.jl @@ -23,7 +23,7 @@ let m = Meta.@lower 1 + 1 ] nstmts = length(src.code) src.ssavaluetypes = nstmts - src.ssaflags = fill(UInt8(0x00), nstmts) + src.ssaflags = fill(zero(UInt32), nstmts) src.debuginfo = Core.DebugInfo(:none) Compiler.verify_ir(Compiler.inflate_ir(src)) global test29262 = true @@ -63,7 +63,7 @@ let m = Meta.@lower 1 + 1 ] nstmts = length(src.code) src.ssavaluetypes = nstmts - src.ssaflags = fill(UInt8(0x00), nstmts) + src.ssaflags = fill(zero(UInt32), nstmts) src.debuginfo = Core.DebugInfo(:none) m.args[1] = copy(src) Compiler.verify_ir(Compiler.inflate_ir(src)) @@ -103,7 +103,7 @@ let m = Meta.@lower 1 + 1 ] nstmts = length(src.code) src.ssavaluetypes = nstmts - src.ssaflags = fill(UInt8(0x00), nstmts) + src.ssaflags = fill(zero(UInt32), nstmts) src.debuginfo = Core.DebugInfo(:none) Compiler.verify_ir(Compiler.inflate_ir(src)) global test29262 = true diff --git a/NEWS.md b/NEWS.md index e8bbb00036de0..3c893566c7219 100644 --- a/NEWS.md +++ b/NEWS.md @@ -17,6 +17,11 @@ New language features - atomic set once (`@atomiconce v[3] = 2`), - atomic swap (`x = @atomicswap v[3] = 2`), and - atomic replace (`x = @atomicreplace v[3] 2=>5`). +- New option `--task-metrics=yes` to enable the collection of per-task timing information, + which can also be enabled/disabled at runtime with `Base.Experimental.task_metrics(::Bool)`. ([#56320]) + The available metrics are: + - actual running time for the task (`Base.Experimental.task_running_time_ns`), and + - wall-time for the task (`Base.Experimental.task_wall_time_ns`). Language changes ---------------- @@ -31,7 +36,7 @@ Language changes may pave the way for inference to be able to intelligently re-use the old results, once the new method is deleted. ([#53415]) - - Macro expansion will no longer eagerly recurse into into `Expr(:toplevel)` + - Macro expansion will no longer eagerly recurse into `Expr(:toplevel)` expressions returned from macros. Instead, macro expansion of `:toplevel` expressions will be delayed until evaluation time. This allows a later expression within a given `:toplevel` expression to make use of macros @@ -55,11 +60,11 @@ Command-line option changes --------------------------- * The `-m/--module` flag can be passed to run the `main` function inside a package with a set of arguments. - This `main` function should be declared using `@main` to indicate that it is an entry point. + This `main` function should be declared using `@main` to indicate that it is an entry point. ([#52103]) * Enabling or disabling color text in Julia can now be controlled with the [`NO_COLOR`](https://no-color.org/) or [`FORCE_COLOR`](https://force-color.org/) environment variables. These variables are also honored by Julia's build system ([#53742], [#56346]). -* `--project=@temp` starts Julia with a temporary environment. +* `--project=@temp` starts Julia with a temporary environment. ([#51149]) * New `--trace-compile-timing` option to report how long each method reported by `--trace-compile` took to compile, in ms. ([#54662]) * `--trace-compile` now prints recompiled methods in yellow or with a trailing comment if color is not supported ([#55763]) @@ -72,7 +77,7 @@ Multi-threading changes a `OncePerProcess{T}` type, which allows defining a function that should be run exactly once the first time it is called, and then always return the same result value of type `T` every subsequent time afterwards. There are also `OncePerThread{T}` and `OncePerTask{T}` types for - similar usage with threads or tasks. ([#TBD]) + similar usage with threads or tasks. ([#55793]) Build system changes -------------------- @@ -86,32 +91,15 @@ New library functions * The new `isfull(c::Channel)` function can be used to check if `put!(c, some_value)` will block. ([#53159]) * `waitany(tasks; throw=false)` and `waitall(tasks; failfast=false, throw=false)` which wait multiple tasks at once ([#53341]). * `uuid7()` creates an RFC 9652 compliant UUID with version 7 ([#54834]). -* `insertdims(array; dims)` allows to insert singleton dimensions into an array which is the inverse operation to `dropdims` +* `insertdims(array; dims)` allows to insert singleton dimensions into an array which is the inverse operation to `dropdims`. ([#45793]) * The new `Fix` type is a generalization of `Fix1/Fix2` for fixing a single argument ([#54653]). New library features -------------------- -* `invmod(n, T)` where `T` is a native integer type now computes the modular inverse of `n` in the modular integer ring that `T` defines ([#52180]). -* `invmod(n)` is an abbreviation for `invmod(n, typeof(n))` for native integer types ([#52180]). -* `replace(string, pattern...)` now supports an optional `IO` argument to - write the output to a stream rather than returning a string ([#48625]). -* `sizehint!(s, n)` now supports an optional `shrink` argument to disable shrinking ([#51929]). -* New function `Docs.hasdoc(module, symbol)` tells whether a name has a docstring ([#52139]). -* New function `Docs.undocumented_names(module)` returns a module's undocumented public names ([#52413]). -* Passing an `IOBuffer` as a stdout argument for `Process` spawn now works as - expected, synchronized with `wait` or `success`, so a `Base.BufferStream` is - no longer required there for correctness to avoid data races ([#52461]). -* After a process exits, `closewrite` will no longer be automatically called on - the stream passed to it. Call `wait` on the process instead to ensure the - content is fully written, then call `closewrite` manually to avoid - data-races. Or use the callback form of `open` to have all that handled - automatically. -* `@timed` now additionally returns the elapsed compilation and recompilation time ([#52889]) * `escape_string` takes additional keyword arguments `ascii=true` (to escape all non-ASCII characters) and `fullhex=true` (to require full 4/8-digit hex numbers - for u/U escapes, e.g. for C compatibility) [#55099]). -* `filter` can now act on a `NamedTuple` ([#50795]). + for u/U escapes, e.g. for C compatibility) ([#55099]). * `tempname` can now take a suffix string to allow the file name to include a suffix and include that suffix in the uniquing checking ([#53474]) * `RegexMatch` objects can now be used to construct `NamedTuple`s and `Dict`s ([#50988]) @@ -120,6 +108,8 @@ New library features * New `ltruncate`, `rtruncate` and `ctruncate` functions for truncating strings to text width, accounting for char widths ([#55351]) * `isless` (and thus `cmp`, sorting, etc.) is now supported for zero-dimensional `AbstractArray`s ([#55772]) * `invoke` now supports passing a Method instead of a type signature making this interface somewhat more flexible for certain uncommon use cases ([#56692]). +* `invoke` now supports passing a CodeInstance instead of a type, which can enable +certain compiler plugin workflows ([#56660]). * `sort` now supports `NTuple`s ([#54494]) Standard library changes @@ -134,7 +124,7 @@ Standard library changes * A new standard library for applying syntax highlighting to Julia code, this uses `JuliaSyntax` and `StyledStrings` to implement a `highlight` function - that creates an `AnnotatedString` with syntax highlighting applied. + that creates an `AnnotatedString` with syntax highlighting applied. ([#51810]) #### Package Manager @@ -182,6 +172,7 @@ Standard library changes in the REPL will now issue a warning the first time occurs. ([#54872]) - When an object is printed automatically (by being returned in the REPL), its display is now truncated after printing 20 KiB. This does not affect manual calls to `show`, `print`, and so forth. ([#53959]) +- Backslash completions now print the respective glyph or emoji next to each matching backslash shortcode. ([#54800]) #### SuiteSparse diff --git a/base/boot.jl b/base/boot.jl index f66ee69780193..4badedae3cfb7 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -175,15 +175,33 @@ #end #mutable struct Task -# parent::Task +# next::Any +# queue::Any # storage::Any -# state::Symbol # donenotify::Any # result::Any -# exception::Any -# backtrace::Any # scope::Any # code::Any +# @atomic _state::UInt8 +# sticky::UInt8 +# priority::UInt16 +# @atomic _isexception::UInt8 +# pad00::UInt8 +# pad01::UInt8 +# pad02::UInt8 +# rngState0::UInt64 +# rngState1::UInt64 +# rngState2::UInt64 +# rngState3::UInt64 +# rngState4::UInt64 +# const metrics_enabled::Bool +# pad10::UInt8 +# pad11::UInt8 +# pad12::UInt8 +# @atomic first_enqueued_at::UInt64 +# @atomic last_started_running_at::UInt64 +# @atomic running_time_ns::UInt64 +# @atomic finished_at::UInt64 #end export diff --git a/base/deprecated.jl b/base/deprecated.jl index 84ef89e44b473..cffff05d954d1 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -4,7 +4,7 @@ # Instructions for Julia Core Developers: # 1. When making a breaking change that is known to be depnedet upon by an # important and closely coupled package, decide on a unique `change_name` -# for your PR and add it to the list below. In general, is is better to +# for your PR and add it to the list below. In general, it is better to # err on the side of caution and assign a `change_name` even if it is not # clear that it is required. `change_name`s may also be assigned after the # fact in a separate PR. (Note that this may cause packages to misbehave diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl index 5119ceaf2164a..c62c980fae1e1 100644 --- a/base/docs/basedocs.jl +++ b/base/docs/basedocs.jl @@ -2031,6 +2031,7 @@ applicable """ invoke(f, argtypes::Type, args...; kwargs...) invoke(f, argtypes::Method, args...; kwargs...) + invoke(f, argtypes::CodeInstance, args...; kwargs...) Invoke a method for the given generic function `f` matching the specified types `argtypes` on the specified arguments `args` and passing the keyword arguments `kwargs`. The arguments `args` must @@ -2056,6 +2057,22 @@ Note in particular that the specified `Method` may be entirely unreachable from If the method is part of the ordinary method table, this call behaves similar to `invoke(f, method.sig, args...)`. +!!! compat "Julia 1.12" + Passing a `Method` requires Julia 1.12. + +# Passing a `CodeInstance` instead of a signature +The `argtypes` argument may be a `CodeInstance`, bypassing both method lookup and specialization. +The semantics of this invocation are similar to a function pointer call of the `CodeInstance`'s +`invoke` pointer. It is an error to invoke a `CodeInstance` with arguments that do not match its +parent `MethodInstance` or from a world age not included in the `min_world`/`max_world` range. +It is undefined behavior to invoke a `CodeInstance` whose behavior does not match the constraints +specified in its fields. For some code instances with `owner !== nothing` (i.e. those generated +by external compilers), it may be an error to invoke them after passing through precompilation. +This is an advanced interface intended for use with external compiler plugins. + +!!! compat "Julia 1.12" + Passing a `CodeInstance` requires Julia 1.12. + # Examples ```jldoctest julia> f(x::Real) = x^2; diff --git a/base/experimental.jl b/base/experimental.jl index 411bb2407cdc5..17871b4f346d6 100644 --- a/base/experimental.jl +++ b/base/experimental.jl @@ -503,4 +503,78 @@ usage, by eliminating the tracking of those possible invalidation. """ disable_new_worlds() = ccall(:jl_disable_new_worlds, Cvoid, ()) +### Task metrics + +""" + Base.Experimental.task_metrics(::Bool) + +Enable or disable the collection of per-task metrics. +A `Task` created when `Base.Experimental.task_metrics(true)` is in effect will have +[`Base.Experimental.task_running_time_ns`](@ref) and [`Base.Experimental.task_wall_time_ns`](@ref) +timing information available. + +!!! note + Task metrics can be enabled at start-up via the `--task-metrics=yes` command line option. +""" +function task_metrics(b::Bool) + if b + ccall(:jl_task_metrics_enable, Cvoid, ()) + else + ccall(:jl_task_metrics_disable, Cvoid, ()) + end + return nothing end + +""" + Base.Experimental.task_running_time_ns(t::Task) -> Union{UInt64, Nothing} + +Return the total nanoseconds that the task `t` has spent running. +This metric is only updated when `t` yields or completes unless `t` is the current task, in +which it will be updated continuously. +See also [`Base.Experimental.task_wall_time_ns`](@ref). + +Returns `nothing` if task timings are not enabled. +See [`Base.Experimental.task_metrics`](@ref). + +!!! note "This metric is from the Julia scheduler" + A task may be running on an OS thread that is descheduled by the OS + scheduler, this time still counts towards the metric. + +!!! compat "Julia 1.12" + This method was added in Julia 1.12. +""" +function task_running_time_ns(t::Task=current_task()) + t.metrics_enabled || return nothing + if t == current_task() + # These metrics fields can't update while we're running. + # But since we're running we need to include the time since we last started running! + return t.running_time_ns + (time_ns() - t.last_started_running_at) + else + return t.running_time_ns + end +end + +""" + Base.Experimental.task_wall_time_ns(t::Task) -> Union{UInt64, Nothing} + +Return the total nanoseconds that the task `t` was runnable. +This is the time since the task first entered the run queue until the time at which it +completed, or until the current time if the task has not yet completed. +See also [`Base.Experimental.task_running_time_ns`](@ref). + +Returns `nothing` if task timings are not enabled. +See [`Base.Experimental.task_metrics`](@ref). + +!!! compat "Julia 1.12" + This method was added in Julia 1.12. +""" +function task_wall_time_ns(t::Task=current_task()) + t.metrics_enabled || return nothing + start_at = t.first_enqueued_at + start_at == 0 && return UInt64(0) + end_at = t.finished_at + end_at == 0 && return time_ns() - start_at + return end_at - start_at +end + +end # module diff --git a/base/optimized_generics.jl b/base/optimized_generics.jl index 86b54a294564d..6b1d146b6172b 100644 --- a/base/optimized_generics.jl +++ b/base/optimized_generics.jl @@ -54,4 +54,31 @@ module KeyValue function get end end +# Compiler-recognized intrinsics for compiler plugins +""" + module CompilerPlugins + +Implements a pair of functions `typeinf`/`typeinf_edge`. When the optimizer sees +a call to `typeinf`, it has license to instead call `typeinf_edge`, supplying the +current inference stack in `parent_frame` (but otherwise supplying the arguments +to `typeinf`). `typeinf_edge` will return the `CodeInstance` that `typeinf` would +have returned at runtime. The optimizer may perform a non-IPO replacement of +the call to `typeinf` by the result of `typeinf_edge`. In addition, the IPO-safe +fields of the `CodeInstance` may be propagated in IPO mode. +""" +module CompilerPlugins + """ + typeinf(owner, mi, source_mode)::CodeInstance + + Return a `CodeInstance` for the given `mi` whose valid results include at + the least current tls world and satisfies the requirements of `source_mode`. + """ + function typeinf end + + """ + typeinf_edge(owner, mi, parent_frame, world, abi_mode)::CodeInstance + """ + function typeinf_edge end +end + end diff --git a/base/options.jl b/base/options.jl index 07baa3b51f65b..7e7808bd5c047 100644 --- a/base/options.jl +++ b/base/options.jl @@ -61,6 +61,7 @@ struct JLOptions heap_size_hint::UInt64 trace_compile_timing::Int8 trim::Int8 + task_metrics::Int8 end # This runs early in the sysimage != is not defined yet diff --git a/base/pointer.jl b/base/pointer.jl index b1580ef17d562..de2f413d8f881 100644 --- a/base/pointer.jl +++ b/base/pointer.jl @@ -169,7 +169,7 @@ The `unsafe` prefix on this function indicates that no validation is performed o pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring that referenced memory is not freed or garbage collected while invoking this function. Incorrect usage may segfault your program. Unlike C, storing memory region allocated as -different type may be valid provided that that the types are compatible. +different type may be valid provided that the types are compatible. !!! compat "Julia 1.10" The `order` argument is available as of Julia 1.10. diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl index 07f608588837b..f65a7d8c9561a 100644 --- a/base/reshapedarray.jl +++ b/base/reshapedarray.jl @@ -121,37 +121,56 @@ reshape reshape(parent::AbstractArray, dims::IntOrInd...) = reshape(parent, dims) reshape(parent::AbstractArray, shp::Tuple{Union{Integer,OneTo}, Vararg{Union{Integer,OneTo}}}) = reshape(parent, to_shape(shp)) +reshape(parent::AbstractArray, dims::Tuple{Integer, Vararg{Integer}}) = reshape(parent, map(Int, dims)) reshape(parent::AbstractArray, dims::Dims) = _reshape(parent, dims) # Allow missing dimensions with Colon(): reshape(parent::AbstractVector, ::Colon) = parent reshape(parent::AbstractVector, ::Tuple{Colon}) = parent reshape(parent::AbstractArray, dims::Int...) = reshape(parent, dims) -reshape(parent::AbstractArray, dims::Union{Int,Colon}...) = reshape(parent, dims) -reshape(parent::AbstractArray, dims::Tuple{Vararg{Union{Int,Colon}}}) = reshape(parent, _reshape_uncolon(parent, dims)) -@inline function _reshape_uncolon(A, dims) - @noinline throw1(dims) = throw(DimensionMismatch(string("new dimensions $(dims) ", - "may have at most one omitted dimension specified by `Colon()`"))) - @noinline throw2(A, dims) = throw(DimensionMismatch(string("array size $(length(A)) ", - "must be divisible by the product of the new dimensions $dims"))) - pre = _before_colon(dims...)::Tuple{Vararg{Int}} +reshape(parent::AbstractArray, dims::Integer...) = reshape(parent, dims) +reshape(parent::AbstractArray, dims::Union{Integer,Colon}...) = reshape(parent, dims) +reshape(parent::AbstractArray, dims::Tuple{Vararg{Union{Integer,Colon}}}) = reshape(parent, _reshape_uncolon(parent, dims)) + +@noinline throw1(dims) = throw(DimensionMismatch(LazyString("new dimensions ", dims, + " may have at most one omitted dimension specified by `Colon()`"))) +@noinline throw2(lenA, dims) = throw(DimensionMismatch(string("array size ", lenA, + " must be divisible by the product of the new dimensions ", dims))) + +@inline function _reshape_uncolon(A, _dims::Tuple{Vararg{Union{Integer, Colon}}}) + # promote the dims to `Int` at least + dims = map(x -> x isa Colon ? x : promote_type(typeof(x), Int)(x), _dims) + pre = _before_colon(dims...) post = _after_colon(dims...) _any_colon(post...) && throw1(dims) - post::Tuple{Vararg{Int}} len = length(A) - sz, is_exact = if iszero(len) - (0, true) + _reshape_uncolon_computesize(len, dims, pre, post) +end +@inline function _reshape_uncolon_computesize(len::Int, dims, pre::Tuple{Vararg{Int}}, post::Tuple{Vararg{Int}}) + sz = if iszero(len) + 0 else let pr = Core.checked_dims(pre..., post...) # safe product - if iszero(pr) - throw2(A, dims) - end - (quo, rem) = divrem(len, pr) - (Int(quo), iszero(rem)) + quo = _reshape_uncolon_computesize_nonempty(len, dims, pr) + convert(Int, quo) end - end::Tuple{Int,Bool} - is_exact || throw2(A, dims) - (pre..., sz, post...)::Tuple{Int,Vararg{Int}} + end + (pre..., sz, post...) +end +@inline function _reshape_uncolon_computesize(len, dims, pre, post) + pr = prod((pre..., post...)) + sz = if iszero(len) + promote(len, pr)[1] # zero of the correct type + else + _reshape_uncolon_computesize_nonempty(len, dims, pr) + end + (pre..., sz, post...) +end +@inline function _reshape_uncolon_computesize_nonempty(len, dims, pr) + iszero(pr) && throw2(len, dims) + (quo, rem) = divrem(len, pr) + iszero(rem) || throw2(len, dims) + quo end @inline _any_colon() = false @inline _any_colon(dim::Colon, tail...) = true diff --git a/base/show.jl b/base/show.jl index 23957d6e29b2d..cb36488b92bc1 100644 --- a/base/show.jl +++ b/base/show.jl @@ -2821,7 +2821,7 @@ function show(io::IO, vm::Core.TypeofVararg) end end -Compiler.include(Compiler.IRShow, "ssair/show.jl") # define `show` for the compiler types +Compiler.load_irshow!() const IRShow = Compiler.IRShow # an alias for compatibility function show(io::IO, src::CodeInfo; debuginfo::Symbol=:source) diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl index c5c330fe0dfcd..814ee2afa9d55 100644 --- a/base/strings/annotated.jl +++ b/base/strings/annotated.jl @@ -55,9 +55,9 @@ like [`string`](@ref) but preserves any annotations present in the arguments. # Examples -```julia-repl +```jldoctest; setup=:(using Base: AnnotatedString) julia> AnnotatedString("this is an example annotated string", - [(1:18, :A => 1), (12:28, :B => 2), (18:35, :C => 3)]) + [(1:18, :A, 1), (12:28, :B, 2), (18:35, :C, 3)]) "this is an example annotated string" ``` """ @@ -87,8 +87,8 @@ AnnotatedChar(s::S, annotations::Vector{$Annotation}) # Examples -```julia-repl -julia> AnnotatedChar('j', :label => 1) +```jldoctest; setup=:(using Base: AnnotatedChar) +julia> AnnotatedChar('j', [(:label, 1)]) 'j': ASCII/Unicode U+006A (category Ll: Letter, lowercase) ``` """ @@ -232,11 +232,11 @@ See also [`AnnotatedString`](@ref) and [`AnnotatedChar`](@ref). ## Examples -```julia-repl +```jldoctest; setup=:(using Base: AnnotatedString, annotatedstring) julia> annotatedstring("now a AnnotatedString") "now a AnnotatedString" -julia> annotatedstring(AnnotatedString("annotated", [(1:9, :label => 1)]), ", and unannotated") +julia> annotatedstring(AnnotatedString("annotated", [(1:9, :label, 1)]), ", and unannotated") "annotated, and unannotated" ``` """ @@ -344,7 +344,7 @@ end annotate!(str::AnnotatedString, [range::UnitRange{Int}], label::Symbol, value) annotate!(str::SubString{AnnotatedString}, [range::UnitRange{Int}], label::Symbol, value) -Annotate a `range` of `str` (or the entire string) with a labeled value (`label` => `value`). +Annotate a `range` of `str` (or the entire string) with a labeled value `(label, value)`. To remove existing `label` annotations, use a value of `nothing`. The order in which annotations are applied to `str` is semantically meaningful, @@ -365,7 +365,7 @@ annotate!(s::SubString{<:AnnotatedString}, label::Symbol, @nospecialize(val::Any """ annotate!(char::AnnotatedChar, label::Symbol, value::Any) -Annotate `char` with the pair `label => value`. +Annotate `char` with the labeled value `(label, value)`. """ annotate!(c::AnnotatedChar, label::Symbol, @nospecialize(val::Any)) = (push!(c.annotations, Annotation((; label, val))); c) diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl index ad047514c85a6..fcb4a371e9898 100644 --- a/base/strings/unicode.jl +++ b/base/strings/unicode.jl @@ -534,11 +534,17 @@ iscntrl(c::AbstractChar) = c <= '\x1f' || '\x7f' <= c <= '\u9f' Tests whether a character belongs to the Unicode general category Punctuation, i.e. a character whose category code begins with 'P'. +!!! note + This behavior is different from the `ispunct` function in C. + # Examples ```jldoctest julia> ispunct('α') false +julia> ispunct('=') +false + julia> ispunct('/') true diff --git a/base/sysimg.jl b/base/sysimg.jl index e57ec1c99bfe6..42f54a849f157 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -1,6 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -# Can be be loaded on top of either an existing system image built from +# Can be loaded on top of either an existing system image built from # `Base_compiler.jl` or standalone, in which case we will build it now. let had_compiler = isdefined(Main, :Base) if had_compiler; else diff --git a/base/task.jl b/base/task.jl index 2a922c4b85f24..951e980ee903c 100644 --- a/base/task.jl +++ b/base/task.jl @@ -977,7 +977,11 @@ function enq_work(t::Task) return t end -schedule(t::Task) = enq_work(t) +function schedule(t::Task) + # [task] created -scheduled-> wait_time + maybe_record_enqueued!(t) + enq_work(t) +end """ schedule(t::Task, [val]; error=false) @@ -1031,6 +1035,8 @@ function schedule(t::Task, @nospecialize(arg); error=false) t.queue === nothing || Base.error("schedule: Task not runnable") setfield!(t, :result, arg) end + # [task] created -scheduled-> wait_time + maybe_record_enqueued!(t) enq_work(t) return t end @@ -1064,11 +1070,15 @@ immediately yields to `t` before calling the scheduler. Throws a `ConcurrencyViolationError` if `t` is the currently running task. """ function yield(t::Task, @nospecialize(x=nothing)) - current = current_task() - t === current && throw(ConcurrencyViolationError("Cannot yield to currently running task!")) + ct = current_task() + t === ct && throw(ConcurrencyViolationError("Cannot yield to currently running task!")) (t._state === task_state_runnable && t.queue === nothing) || throw(ConcurrencyViolationError("yield: Task not runnable")) + # [task] user_time -yield-> wait_time + record_running_time!(ct) + # [task] created -scheduled-> wait_time + maybe_record_enqueued!(t) t.result = x - enq_work(current) + enq_work(ct) set_next_task(t) return try_yieldto(ensure_rescheduled) end @@ -1082,6 +1092,7 @@ call to `yieldto`. This is a low-level call that only switches tasks, not consid or scheduling in any way. Its use is discouraged. """ function yieldto(t::Task, @nospecialize(x=nothing)) + ct = current_task() # TODO: these are legacy behaviors; these should perhaps be a scheduler # state error instead. if t._state === task_state_done @@ -1089,6 +1100,10 @@ function yieldto(t::Task, @nospecialize(x=nothing)) elseif t._state === task_state_failed throw(t.result) end + # [task] user_time -yield-> wait_time + record_running_time!(ct) + # [task] created -scheduled-unfairly-> wait_time + maybe_record_enqueued!(t) t.result = x set_next_task(t) return try_yieldto(identity) @@ -1102,6 +1117,10 @@ function try_yieldto(undo) rethrow() end ct = current_task() + # [task] wait_time -(re)started-> user_time + if ct.metrics_enabled + @atomic :monotonic ct.last_started_running_at = time_ns() + end if ct._isexception exc = ct.result ct.result = nothing @@ -1115,6 +1134,11 @@ end # yield to a task, throwing an exception in it function throwto(t::Task, @nospecialize exc) + ct = current_task() + # [task] user_time -yield-> wait_time + record_running_time!(ct) + # [task] created -scheduled-unfairly-> wait_time + maybe_record_enqueued!(t) t.result = exc t._isexception = true set_next_task(t) @@ -1167,6 +1191,9 @@ checktaskempty = Partr.multiq_check_empty end function wait() + ct = current_task() + # [task] user_time -yield-or-done-> wait_time + record_running_time!(ct) GC.safepoint() W = workqueue_for(Threads.threadid()) poptask(W) @@ -1181,3 +1208,21 @@ if Sys.iswindows() else pause() = ccall(:pause, Cvoid, ()) end + +# update the `running_time_ns` field of `t` to include the time since it last started running. +function record_running_time!(t::Task) + if t.metrics_enabled && !istaskdone(t) + @atomic :monotonic t.running_time_ns += time_ns() - t.last_started_running_at + end + return t +end + +# if this is the first time `t` has been added to the run queue +# (or the first time it has been unfairly yielded to without being added to the run queue) +# then set the `first_enqueued_at` field to the current time. +function maybe_record_enqueued!(t::Task) + if t.metrics_enabled && t.first_enqueued_at == 0 + @atomic :monotonic t.first_enqueued_at = time_ns() + end + return t +end diff --git a/deps/checksums/Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/md5 b/deps/checksums/Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/md5 deleted file mode 100644 index e55e74562d717..0000000000000 --- a/deps/checksums/Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -20d63322fc5b547d4c2464c27e9a6a0e diff --git a/deps/checksums/Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/sha512 b/deps/checksums/Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/sha512 deleted file mode 100644 index 5094dddb8142a..0000000000000 --- a/deps/checksums/Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -93dd178af474c76cce9368416d34570b66cc44c7c311e4dc14569d3f9deed70afcae8a2b1976535ed0732ed305c6d8d1b0ef04cbeeaa3af2891e97650d51467d diff --git a/deps/checksums/Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/md5 b/deps/checksums/Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/md5 new file mode 100644 index 0000000000000..f5f87b3f2fa9e --- /dev/null +++ b/deps/checksums/Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/md5 @@ -0,0 +1 @@ +1a5c995237815e0d7d5ee1ec50006c1c diff --git a/deps/checksums/Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/sha512 b/deps/checksums/Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/sha512 new file mode 100644 index 0000000000000..839cd4704a135 --- /dev/null +++ b/deps/checksums/Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/sha512 @@ -0,0 +1 @@ +2e0b984c8272fe4468e0b527698a58d5010ef3f18d38d862665902e5a0e0b7ba65d3085b3d9de367a7b48a216e71d1611687804254503b3905b7b4d217a00f2f diff --git a/deps/tools/jlchecksum b/deps/tools/jlchecksum index 329d3a2a845d4..9945ec89e6bda 100755 --- a/deps/tools/jlchecksum +++ b/deps/tools/jlchecksum @@ -63,7 +63,7 @@ find_checksum() fi done if [ ! -f "$DEPSDIR/checksums/$BASENAME/$CHECKSUM_TYPE" ]; then - if [ ${TAGGED_RELEASE_BANNER:-} ]; then + if [ "${TAGGED_RELEASE_BANNER:-}" ]; then echo "WARNING: $CHECKSUM_TYPE checksum for $BASENAME not found in deps/checksums/, failing release build." >&2 exit 3 fi diff --git a/doc/man/julia.1 b/doc/man/julia.1 index 56cb690d66eeb..2da11ae1b3f18 100644 --- a/doc/man/julia.1 +++ b/doc/man/julia.1 @@ -294,6 +294,10 @@ If --trace-compile is enabled show how long each took to compile in ms --trace-dispatch={stderr|name} Print precompile statements for methods dispatched during execution or save to stderr or a path. +.TP +--task-metrics={yes|no*} +Enable the collection of per-task metrics. + .TP -image-codegen Force generate code in imaging mode diff --git a/doc/src/base/multi-threading.md b/doc/src/base/multi-threading.md index 9e3bc49acf6dc..81d1d83d765ac 100644 --- a/doc/src/base/multi-threading.md +++ b/doc/src/base/multi-threading.md @@ -65,3 +65,11 @@ These building blocks are used to create the regular synchronization objects. ```@docs Base.Threads.SpinLock ``` + +## Task metrics (Experimental) + +```@docs +Base.Experimental.task_metrics +Base.Experimental.task_running_time_ns +Base.Experimental.task_wall_time_ns +``` diff --git a/doc/src/base/reflection.md b/doc/src/base/reflection.md index 9228fb38322df..d88c3c8b0d0cf 100644 --- a/doc/src/base/reflection.md +++ b/doc/src/base/reflection.md @@ -100,10 +100,12 @@ as assignments, branches, and calls: ```jldoctest; setup = (using Base: +, sin) julia> Meta.lower(@__MODULE__, :( [1+2, sin(0.5)] )) :($(Expr(:thunk, CodeInfo( -1 ─ %1 = dynamic 1 + 2 -│ %2 = dynamic sin(0.5) -│ %3 = dynamic Base.vect(%1, %2) -└── return %3 +1 ─ %1 = :+ +│ %2 = dynamic (%1)(1, 2) +│ %3 = sin +│ %4 = dynamic (%3)(0.5) +│ %5 = dynamic Base.vect(%2, %4) +└── return %5 )))) ``` diff --git a/doc/src/devdocs/build/build.md b/doc/src/devdocs/build/build.md index 553f7c2e815cf..966ef3d102d1c 100644 --- a/doc/src/devdocs/build/build.md +++ b/doc/src/devdocs/build/build.md @@ -249,7 +249,7 @@ The most complicated dependency is LLVM, for which we require additional patches For packaging Julia with LLVM, we recommend either: - bundling a Julia-only LLVM library inside the Julia package, or - adding the patches to the LLVM package of the distribution. - * A complete list of patches is available in on [Github](https://github.com/JuliaLang/llvm-project) see the `julia-release/15.x` branch. + * A complete list of patches is available in on [Github](https://github.com/JuliaLang/llvm-project) see the `julia-release/18.x` branch. * The only Julia-specific patch is the lib renaming (`llvm7-symver-jlprefix.patch`), which should _not_ be applied to a system LLVM. * The remaining patches are all upstream bug fixes, and have been contributed into upstream LLVM. diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md index 734d7031db5e8..9b06deaf0ea8a 100644 --- a/doc/src/manual/command-line-interface.md +++ b/doc/src/manual/command-line-interface.md @@ -203,6 +203,7 @@ The following is a complete list of command-line switches available when launchi |`--code-coverage=tracefile.info` |Append coverage information to the LCOV tracefile (filename supports format tokens).| |`--track-allocation[={none*\|user\|all}]` |Count bytes allocated by each source line (omitting setting is equivalent to "user")| |`--track-allocation=@` |Count bytes but only in files that fall under the given file path/directory. The `@` prefix is required to select this option. A `@` with no path will track the current directory.| +|`--task-metrics={yes\|no*}` |Enable the collection of per-task metrics| |`--bug-report=KIND` |Launch a bug report session. It can be used to start a REPL, run a script, or evaluate expressions. It first tries to use BugReporting.jl installed in current environment and falls back to the latest compatible BugReporting.jl if not. For more information, see `--bug-report=help`.| |`--heap-size-hint=` |Forces garbage collection if memory usage is higher than the given value. The value may be specified as a number of bytes, optionally in units of KB, MB, GB, or TB, or as a percentage of physical memory with %.| |`--compile={yes*\|no\|all\|min}` |Enable or disable JIT compiler, or request exhaustive or minimal compilation| diff --git a/src/APInt-C.cpp b/src/APInt-C.cpp index e73399c2ecde4..86b0bdb27638b 100644 --- a/src/APInt-C.cpp +++ b/src/APInt-C.cpp @@ -475,7 +475,6 @@ void LLVMTrunc(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *otys, integerP memcpy(pr, pa, onumbytes); } -#if JL_LLVM_VERSION >= 170000 extern "C" JL_DLLEXPORT unsigned countr_zero_8(uint8_t Val) { return countr_zero(Val); @@ -495,27 +494,6 @@ extern "C" JL_DLLEXPORT unsigned countr_zero_64(uint64_t Val) { return countr_zero(Val); } -#else -extern "C" JL_DLLEXPORT -unsigned countTrailingZeros_8(uint8_t Val) { - return countTrailingZeros(Val); -} - -extern "C" JL_DLLEXPORT -unsigned countTrailingZeros_16(uint16_t Val) { - return countTrailingZeros(Val); -} - -extern "C" JL_DLLEXPORT -unsigned countTrailingZeros_32(uint32_t Val) { - return countTrailingZeros(Val); -} - -extern "C" JL_DLLEXPORT -unsigned countTrailingZeros_64(uint64_t Val) { - return countTrailingZeros(Val); -} -#endif extern "C" JL_DLLEXPORT void jl_LLVMSMod(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr) { @@ -545,7 +523,6 @@ void jl_LLVMFlipSign(unsigned numbits, integerPart *pa, integerPart *pb, integer memcpy(pr, pa, numbytes); } -#if JL_LLVM_VERSION >= 170000 extern "C" JL_DLLEXPORT unsigned LLVMPopcount(unsigned numbits, integerPart *pa) { CREATE(a) @@ -575,34 +552,3 @@ unsigned LLVMCountl_zero(unsigned numbits, integerPart *pa) { CREATE(a) return a.countl_zero(); } -#else -extern "C" JL_DLLEXPORT -unsigned LLVMCountPopulation(unsigned numbits, integerPart *pa) { - CREATE(a) - return a.countPopulation(); -} - -extern "C" JL_DLLEXPORT -unsigned LLVMCountTrailingOnes(unsigned numbits, integerPart *pa) { - CREATE(a) - return a.countTrailingOnes(); -} - -extern "C" JL_DLLEXPORT -unsigned LLVMCountTrailingZeros(unsigned numbits, integerPart *pa) { - CREATE(a) - return a.countTrailingZeros(); -} - -extern "C" JL_DLLEXPORT -unsigned LLVMCountLeadingOnes(unsigned numbits, integerPart *pa) { - CREATE(a) - return a.countLeadingOnes(); -} - -extern "C" JL_DLLEXPORT -unsigned LLVMCountLeadingZeros(unsigned numbits, integerPart *pa) { - CREATE(a) - return a.countLeadingZeros(); -} -#endif diff --git a/src/APInt-C.h b/src/APInt-C.h index a44d922a40d24..59ce3c765eeec 100644 --- a/src/APInt-C.h +++ b/src/APInt-C.h @@ -54,19 +54,11 @@ JL_DLLEXPORT int LLVMDiv_uov(unsigned numbits, integerPart *pa, integerPart *pb, JL_DLLEXPORT int LLVMRem_sov(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr); JL_DLLEXPORT int LLVMRem_uov(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr); -#if JL_LLVM_VERSION >= 170000 JL_DLLEXPORT unsigned LLVMPopcount(unsigned numbits, integerPart *pa); JL_DLLEXPORT unsigned LLVMCountr_one(unsigned numbits, integerPart *pa); JL_DLLEXPORT unsigned LLVMCountr_zero(unsigned numbits, integerPart *pa); JL_DLLEXPORT unsigned LLVMCountl_one(unsigned numbits, integerPart *pa); JL_DLLEXPORT unsigned LLVMCountl_zero(unsigned numbits, integerPart *pa); -#else -JL_DLLEXPORT unsigned LLVMCountPopulation(unsigned numbits, integerPart *pa); -JL_DLLEXPORT unsigned LLVMCountTrailingOnes(unsigned numbits, integerPart *pa); -JL_DLLEXPORT unsigned LLVMCountTrailingZeros(unsigned numbits, integerPart *pa); -JL_DLLEXPORT unsigned LLVMCountLeadingOnes(unsigned numbits, integerPart *pa); -JL_DLLEXPORT unsigned LLVMCountLeadingZeros(unsigned numbits, integerPart *pa); -#endif JL_DLLEXPORT void LLVMFPtoSI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr); JL_DLLEXPORT void LLVMFPtoUI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr); @@ -82,17 +74,10 @@ JL_DLLEXPORT int LLVMFPtoUI_exact(jl_datatype_t *ty, integerPart *pa, jl_datatyp JL_DLLEXPORT void jl_LLVMSMod(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr); JL_DLLEXPORT void jl_LLVMFlipSign(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr); -#if JL_LLVM_VERSION >= 170000 JL_DLLEXPORT unsigned countr_zero_8(uint8_t Val); JL_DLLEXPORT unsigned countr_zero_16(uint16_t Val); JL_DLLEXPORT unsigned countr_zero_32(uint32_t Val); JL_DLLEXPORT unsigned countr_zero_64(uint64_t Val); -#else -JL_DLLEXPORT unsigned countTrailingZeros_8(uint8_t Val); -JL_DLLEXPORT unsigned countTrailingZeros_16(uint16_t Val); -JL_DLLEXPORT unsigned countTrailingZeros_32(uint32_t Val); -JL_DLLEXPORT unsigned countTrailingZeros_64(uint64_t Val); -#endif //uint8_t getSwappedBytes_8(uint8_t Value); // no-op //uint16_t getSwappedBytes_16(uint16_t Value); diff --git a/src/Makefile b/src/Makefile index 3458f51fa5548..9355ca2c4c675 100644 --- a/src/Makefile +++ b/src/Makefile @@ -77,11 +77,7 @@ else # JULIACODEGEN != LLVM endif -RT_LLVM_LIBS := support - -ifeq ($(shell test $(LLVM_VER_MAJ) -ge 16 && echo true),true) -RT_LLVM_LIBS += targetparser -endif +RT_LLVM_LIBS := support targetparser ifeq ($(OS),WINNT) SRCS += win32_ucontext diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 1d1e48efc8c6c..ba9a3c05e41ff 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -4,11 +4,7 @@ #include "platform.h" // target support -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include "llvm/Support/CodeGen.h" #include #include @@ -963,11 +959,7 @@ static FunctionInfo getFunctionWeight(const Function &F) auto val = F.getFnAttribute("julia.mv.clones").getValueAsString(); // base16, so must be at most 4 * length bits long // popcount gives number of clones - #if JL_LLVM_VERSION >= 170000 info.clones = APInt(val.size() * 4, val, 16).popcount() + 1; - #else - info.clones = APInt(val.size() * 4, val, 16).countPopulation() + 1; - #endif } info.weight += info.insts; // more basic blocks = more complex than just sum of insts, @@ -1372,7 +1364,7 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer // So for now we inject a definition of these functions that calls our runtime // functions. We do so after optimization to avoid cloning these functions. // Float16 conversion routines -#if defined(_CPU_X86_64_) && defined(_OS_DARWIN_) && JL_LLVM_VERSION >= 160000 +#if defined(_CPU_X86_64_) && defined(_OS_DARWIN_) // LLVM 16 reverted to soft-float ABI for passing half on x86_64 Darwin // https://github.com/llvm/llvm-project/commit/2bcf51c7f82ca7752d1bba390a2e0cb5fdd05ca9 injectCRTAlias(M, "__gnu_h2f_ieee", "julia_half_to_float", @@ -1721,9 +1713,6 @@ static SmallVector add_output(Module &M, TargetMachine &TM, Stri std::function func = [&, i]() { LLVMContext ctx; ctx.setDiscardValueNames(true); - #if JL_LLVM_VERSION < 170000 - SetOpaquePointer(ctx); - #endif // Lazily deserialize the entire module timers[i].deserialize.startTimer(); auto EM = getLazyBitcodeModule(MemoryBufferRef(StringRef(serialized.data(), serialized.size()), "Optimized"), ctx); @@ -1888,7 +1877,7 @@ void jl_dump_native_impl(void *native_code, Str += "10.14.0"; TheTriple.setOSName(Str); } - Optional RelocModel; + std::optional RelocModel; if (TheTriple.isOSLinux() || TheTriple.isOSFreeBSD() || TheTriple.isOSOpenBSD()) { RelocModel = Reloc::PIC_; } @@ -1933,9 +1922,6 @@ void jl_dump_native_impl(void *native_code, JL_TIMING(NATIVE_AOT, NATIVE_Sysimg); LLVMContext Context; Context.setDiscardValueNames(true); - #if JL_LLVM_VERSION < 170000 - SetOpaquePointer(Context); - #endif Module sysimgM("sysimg", Context); sysimgM.setTargetTriple(TheTriple.str()); sysimgM.setDataLayout(DL); @@ -2081,9 +2067,6 @@ void jl_dump_native_impl(void *native_code, JL_TIMING(NATIVE_AOT, NATIVE_Metadata); LLVMContext Context; Context.setDiscardValueNames(true); - #if JL_LLVM_VERSION < 170000 - SetOpaquePointer(Context); - #endif Module metadataM("metadata", Context); metadataM.setTargetTriple(TheTriple.str()); metadataM.setDataLayout(DL); @@ -2299,16 +2282,7 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, jl_ } else { auto p = literal_static_pointer_val(global.first, global.second->getValueType()); - #if JL_LLVM_VERSION >= 170000 Type *elty = PointerType::get(output.getContext(), 0); - #else - Type *elty; - if (p->getType()->isOpaquePointerTy()) { - elty = PointerType::get(output.getContext(), 0); - } else { - elty = p->getType()->getNonOpaquePointerElementType(); - } - #endif // For pretty printing, when LLVM inlines the global initializer into its loads auto alias = GlobalAlias::create(elty, 0, GlobalValue::PrivateLinkage, global.second->getName() + ".jit", p, global.second->getParent()); global.second->setInitializer(ConstantExpr::getBitCast(alias, global.second->getValueType())); diff --git a/src/ast.c b/src/ast.c index 474c0661f5230..959bac26c2c15 100644 --- a/src/ast.c +++ b/src/ast.c @@ -181,42 +181,6 @@ static value_t fl_defined_julia_global(fl_context_t *fl_ctx, value_t *args, uint return (bpart != NULL && decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_GLOBAL) ? fl_ctx->T : fl_ctx->F; } -static value_t fl_nothrow_julia_global(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) -{ - // tells whether a var is defined, in the sense that accessing it is nothrow - // can take either a symbol or a module and a symbol - jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx); - jl_module_t *mod = ctx->module; - jl_sym_t *var = NULL; - if (nargs == 1) { - (void)tosymbol(fl_ctx, args[0], "nothrow-julia-global"); - var = scmsym_to_julia(fl_ctx, args[0]); - } - else { - argcount(fl_ctx, "nothrow-julia-global", nargs, 2); - value_t argmod = args[0]; - if (iscvalue(argmod) && cv_class((cvalue_t*)ptr(argmod)) == jl_ast_ctx(fl_ctx)->jvtype) { - mod = *(jl_module_t**)cv_data((cvalue_t*)ptr(argmod)); - JL_GC_PROMISE_ROOTED(mod); - } else { - if (!iscons(argmod) || !issymbol(car_(argmod)) || scmsym_to_julia(fl_ctx, car_(argmod)) != jl_thismodule_sym) { - lerrorf(fl_ctx, fl_ctx->ArgError, "nothrow-julia-global: Unknown globalref module kind"); - } - } - (void)tosymbol(fl_ctx, args[1], "nothrow-julia-global"); - var = scmsym_to_julia(fl_ctx, args[1]); - } - jl_binding_t *b = jl_get_module_binding(mod, var, 0); - jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age); - jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age); - if (!bpart) - return fl_ctx->F; - if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) - return fl_ctx->F; - return (jl_bkind_is_some_constant(decode_restriction_kind(pku)) ? - decode_restriction_value(pku) : jl_atomic_load_relaxed(&b->value)) != NULL ? fl_ctx->T : fl_ctx->F; -} - // Used to generate a unique suffix for a given symbol (e.g. variable or type name) // first argument contains a stack of method definitions seen so far by `closure-convert` in flisp. // if the top of the stack is non-NIL, we use it to augment the suffix so that it becomes @@ -288,7 +252,6 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m static const builtinspec_t julia_flisp_ast_ext[] = { { "defined-julia-global", fl_defined_julia_global }, // TODO: can we kill this safepoint - { "nothrow-julia-global", fl_nothrow_julia_global }, { "current-julia-module-counter", fl_module_unique_name }, { "julia-scalar?", fl_julia_scalar }, { NULL, NULL } diff --git a/src/builtins.c b/src/builtins.c index c6b0bf130550b..4b9c1ad6043c2 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1587,6 +1587,30 @@ JL_CALLABLE(jl_f_invoke) if (!jl_tuple1_isa(args[0], &args[2], nargs - 1, (jl_datatype_t*)m->sig)) jl_type_error("invoke: argument type error", argtypes, arg_tuple(args[0], &args[2], nargs - 1)); return jl_gf_invoke_by_method(m, args[0], &args[2], nargs - 1); + } else if (jl_is_code_instance(argtypes)) { + jl_code_instance_t *codeinst = (jl_code_instance_t*)args[1]; + jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke); + // N.B.: specTypes need not be a subtype of the method signature. We need to check both. + if (!jl_tuple1_isa(args[0], &args[2], nargs - 1, (jl_datatype_t*)codeinst->def->specTypes) || + (jl_is_method(codeinst->def->def.value) && !jl_tuple1_isa(args[0], &args[2], nargs - 1, (jl_datatype_t*)codeinst->def->def.method->sig))) { + jl_type_error("invoke: argument type error", codeinst->def->specTypes, arg_tuple(args[0], &args[2], nargs - 1)); + } + if (jl_atomic_load_relaxed(&codeinst->min_world) > jl_current_task->world_age || + jl_current_task->world_age > jl_atomic_load_relaxed(&codeinst->max_world)) { + jl_error("invoke: CodeInstance not valid for this world"); + } + if (!invoke) { + jl_compile_codeinst(codeinst); + invoke = jl_atomic_load_acquire(&codeinst->invoke); + } + if (invoke) { + return invoke(args[0], &args[2], nargs - 2, codeinst); + } else { + if (codeinst->owner != jl_nothing) { + jl_error("Failed to invoke or compile external codeinst"); + } + return jl_invoke(args[0], &args[2], nargs - 1, codeinst->def); + } } if (!jl_is_tuple_type(jl_unwrap_unionall(argtypes))) jl_type_error("invoke", (jl_value_t*)jl_anytuple_type_type, argtypes); diff --git a/src/ccall.cpp b/src/ccall.cpp index 52f8f807132e5..9aa1c56e2e78f 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -264,9 +264,6 @@ static GlobalVariable *emit_plt_thunk( SmallVector args; for (auto &arg : plt->args()) args.push_back(&arg); - #if JL_LLVM_VERSION < 170000 - assert(cast(ptr->getType())->isOpaqueOrPointeeTypeMatches(functype)); - #endif CallInst *ret = irbuilder.CreateCall( functype, ptr, ArrayRef(args)); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 7d4bd917eff30..9e170555e6149 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -60,11 +60,7 @@ static Value *decay_derived(jl_codectx_t &ctx, Value *V) if (T->getPointerAddressSpace() == AddressSpace::Derived) return V; // Once llvm deletes pointer element types, we won't need it here any more either. - #if JL_LLVM_VERSION >= 170000 Type *NewT = PointerType::get(T, AddressSpace::Derived); - #else - Type *NewT = PointerType::getWithSamePointeeType(cast(T), AddressSpace::Derived); - #endif return ctx.builder.CreateAddrSpaceCast(V, NewT); } @@ -74,11 +70,7 @@ static Value *maybe_decay_tracked(jl_codectx_t &ctx, Value *V) Type *T = V->getType(); if (T->getPointerAddressSpace() != AddressSpace::Tracked) return V; - #if JL_LLVM_VERSION >= 170000 Type *NewT = PointerType::get(T, AddressSpace::Derived); - #else - Type *NewT = PointerType::getWithSamePointeeType(cast(T), AddressSpace::Derived); - #endif return ctx.builder.CreateAddrSpaceCast(V, NewT); } @@ -1010,54 +1002,6 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const { if (sz == 0) return; - #if JL_LLVM_VERSION < 170000 - // If the types are small and simple, use load and store directly. - // Going through memcpy can cause LLVM (e.g. SROA) to create bitcasts between float and int - // that interferes with other optimizations. - // TODO: Restore this for opaque pointers? Needs extra type information from the caller. - if (ctx.builder.getContext().supportsTypedPointers() && sz <= 64) { - // The size limit is arbitrary but since we mainly care about floating points and - // machine size vectors this should be enough. - const DataLayout &DL = jl_Module->getDataLayout(); - auto srcty = cast(src->getType()); - //TODO unsafe nonopaque pointer - auto srcel = srcty->getNonOpaquePointerElementType(); - auto dstty = cast(dst->getType()); - //TODO unsafe nonopaque pointer - auto dstel = dstty->getNonOpaquePointerElementType(); - while (srcel->isArrayTy() && srcel->getArrayNumElements() == 1) { - src = ctx.builder.CreateConstInBoundsGEP2_32(srcel, src, 0, 0); - srcel = srcel->getArrayElementType(); - srcty = srcel->getPointerTo(); - } - while (dstel->isArrayTy() && dstel->getArrayNumElements() == 1) { - dst = ctx.builder.CreateConstInBoundsGEP2_32(dstel, dst, 0, 0); - dstel = dstel->getArrayElementType(); - dstty = dstel->getPointerTo(); - } - - llvm::Type *directel = nullptr; - if (srcel->isSized() && srcel->isSingleValueType() && DL.getTypeStoreSize(srcel) == sz) { - directel = srcel; - dst = emit_bitcast(ctx, dst, srcty); - } - else if (dstel->isSized() && dstel->isSingleValueType() && - DL.getTypeStoreSize(dstel) == sz) { - directel = dstel; - src = emit_bitcast(ctx, src, dstty); - } - if (directel) { - if (isa(src) && !src->hasName()) - setName(ctx.emission_context, src, "memcpy_refined_src"); - if (isa(dst) && !dst->hasName()) - setName(ctx.emission_context, dst, "memcpy_refined_dst"); - auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, MaybeAlign(align_src), is_volatile)); - dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, align_dst, is_volatile)); - ++SkippedMemcpys; - return; - } - } - #endif ++EmittedMemcpys; // the memcpy intrinsic does not allow to specify different alias tags @@ -1940,7 +1884,7 @@ static std::pair emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, // actual `isa` calls, this optimization should already have been performed upstream // anyway, but having this optimization in codegen might still be beneficial for // `typeassert`s if we can make it correct. - Optional known_isa; + std::optional known_isa; jl_value_t *intersected_type = type; if (x.constant) known_isa = jl_isa(x.constant, type); @@ -3786,11 +3730,7 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig for (auto *User : Val->users()) { if (isa(User)) { GetElementPtrInst *Inst = cast(User); - #if JL_LLVM_VERSION >= 170000 Inst->mutateType(PointerType::get(Inst->getType(), ToAS)); - #else - Inst->mutateType(PointerType::getWithSamePointeeType(cast(Inst->getType()), ToAS)); - #endif recursively_adjust_ptr_type(Inst, FromAS, ToAS); } else if (isa(User)) { @@ -3799,11 +3739,7 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig } else if (isa(User)) { BitCastInst *Inst = cast(User); - #if JL_LLVM_VERSION >= 170000 Inst->mutateType(PointerType::get(Inst->getType(), ToAS)); - #else - Inst->mutateType(PointerType::getWithSamePointeeType(cast(Inst->getType()), ToAS)); - #endif recursively_adjust_ptr_type(Inst, FromAS, ToAS); } } diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp index cac89a6761d01..40093ca15859b 100644 --- a/src/clangsa/GCChecker.cpp +++ b/src/clangsa/GCChecker.cpp @@ -847,7 +847,8 @@ bool GCChecker::isGCTrackedType(QualType QT) { Name.ends_with_insensitive("jl_vararg_t") || Name.ends_with_insensitive("jl_opaque_closure_t") || Name.ends_with_insensitive("jl_globalref_t") || - // Probably not technically true for these, but let's allow it + // Probably not technically true for these, but let's allow it as a root + Name.ends_with_insensitive("jl_ircode_state") || Name.ends_with_insensitive("typemap_intersection_env") || Name.ends_with_insensitive("interpreter_state") || Name.ends_with_insensitive("jl_typeenv_t") || diff --git a/src/codegen.cpp b/src/codegen.cpp index 0b483696567ad..b9d42b9b5af16 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -23,11 +23,7 @@ #include #include #include -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include @@ -81,6 +77,10 @@ #include #include +#ifdef USE_ITTAPI +#include "ittapi/ittnotify.h" +#endif + using namespace llvm; static bool jl_fpo_disabled(const Triple &TT) { @@ -633,11 +633,7 @@ static AttributeList get_func_attrs(LLVMContext &C) static AttributeList get_donotdelete_func_attrs(LLVMContext &C) { AttrBuilder FnAttrs(C); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly()); -#else - FnAttrs.addAttribute(Attribute::InaccessibleMemOnly); -#endif FnAttrs.addAttribute(Attribute::WillReturn); FnAttrs.addAttribute(Attribute::NoUnwind); return AttributeList::get(C, @@ -667,11 +663,7 @@ static AttributeList get_attrs_box_float(LLVMContext &C, unsigned nbytes) auto FnAttrs = AttrBuilder(C); FnAttrs.addAttribute(Attribute::WillReturn); FnAttrs.addAttribute(Attribute::NoUnwind); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly()); -#else - FnAttrs.addAttribute(Attribute::InaccessibleMemOnly); -#endif auto RetAttrs = AttrBuilder(C); RetAttrs.addAttribute(Attribute::NonNull); RetAttrs.addDereferenceableAttr(nbytes); @@ -687,11 +679,7 @@ static AttributeList get_attrs_box_sext(LLVMContext &C, unsigned nbytes) auto FnAttrs = AttrBuilder(C); FnAttrs.addAttribute(Attribute::WillReturn); FnAttrs.addAttribute(Attribute::NoUnwind); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly()); -#else - FnAttrs.addAttribute(Attribute::InaccessibleMemOnly); -#endif auto RetAttrs = AttrBuilder(C); RetAttrs.addAttribute(Attribute::NonNull); RetAttrs.addAttribute(Attribute::getWithDereferenceableBytes(C, nbytes)); @@ -708,11 +696,7 @@ static AttributeList get_attrs_box_zext(LLVMContext &C, unsigned nbytes) auto FnAttrs = AttrBuilder(C); FnAttrs.addAttribute(Attribute::WillReturn); FnAttrs.addAttribute(Attribute::NoUnwind); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly()); -#else - FnAttrs.addAttribute(Attribute::InaccessibleMemOnly); -#endif auto RetAttrs = AttrBuilder(C); RetAttrs.addAttribute(Attribute::NonNull); RetAttrs.addDereferenceableAttr(nbytes); @@ -1137,12 +1121,7 @@ static const auto jlegalx_func = new JuliaFunction{ return FunctionType::get(getInt32Ty(C), {T, T, T_size}, false); }, [](LLVMContext &C) { AttrBuilder FnAttrs(C); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleOrArgMemOnly()); -#else - FnAttrs.addAttribute(Attribute::ReadOnly); - FnAttrs.addAttribute(Attribute::ArgMemOnly); -#endif FnAttrs.addAttribute(Attribute::NoUnwind); return AttributeList::get(C, AttributeSet::get(C, FnAttrs), @@ -1162,9 +1141,7 @@ static const auto jl_alloc_obj_func = new JuliaFunction{ auto FnAttrs = AttrBuilder(C); FnAttrs.addAllocSizeAttr(1, None); // returns %1 bytes FnAttrs.addAllocKindAttr(AllocFnKind::Alloc); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref) | MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef)); -#endif FnAttrs.addAttribute(Attribute::WillReturn); FnAttrs.addAttribute(Attribute::NoUnwind); auto RetAttrs = AttrBuilder(C); @@ -1200,11 +1177,7 @@ static const auto jl_typeof_func = new JuliaFunction<>{ }, [](LLVMContext &C) { AttrBuilder FnAttrs(C); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::none()); -#else - FnAttrs.addAttribute(Attribute::ReadNone); -#endif FnAttrs.addAttribute(Attribute::NoUnwind); FnAttrs.addAttribute(Attribute::NoRecurse); return AttributeList::get(C, @@ -1219,11 +1192,7 @@ static const auto jl_write_barrier_func = new JuliaFunction<>{ {JuliaType::get_prjlvalue_ty(C)}, true); }, [](LLVMContext &C) { AttrBuilder FnAttrs(C); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly()); -#else - FnAttrs.addAttribute(Attribute::InaccessibleMemOnly); -#endif FnAttrs.addAttribute(Attribute::NoUnwind); FnAttrs.addAttribute(Attribute::NoRecurse); return AttributeList::get(C, @@ -1296,12 +1265,7 @@ static const auto memcmp_func = new JuliaFunction{ {getPointerTy(C), getPointerTy(C), T_size}, false); }, [](LLVMContext &C) { AttrBuilder FnAttrs(C); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref)); -#else - FnAttrs.addAttribute(Attribute::ArgMemOnly); - FnAttrs.addAttribute(Attribute::ReadOnly); -#endif FnAttrs.addAttribute(Attribute::NoUnwind); return AttributeList::get(C, AttributeSet::get(C, FnAttrs), @@ -1351,11 +1315,7 @@ static const auto jlfieldindex_func = new JuliaFunction<>{ }, [](LLVMContext &C) { AttrBuilder FnAttrs(C); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::readOnly()); -#else - FnAttrs.addAttribute(Attribute::ReadOnly); -#endif FnAttrs.addAttribute(Attribute::NoUnwind); FnAttrs.addAttribute(Attribute::WillReturn); return AttributeList::get(C, @@ -1421,9 +1381,7 @@ static const auto jl_allocgenericmemory = new JuliaFunction= 160000 FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef) | MemoryEffects::argMemOnly(ModRefInfo::Ref)); -#endif FnAttrs.addAttribute(Attribute::WillReturn); RetAttrs.addAlignmentAttr(Align(16)); RetAttrs.addAttribute(Attribute::NonNull); @@ -1501,11 +1459,7 @@ static const auto pointer_from_objref_func = new JuliaFunction<>{ {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Derived)}, false); }, [](LLVMContext &C) { AttrBuilder FnAttrs(C); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::none()); -#else - FnAttrs.addAttribute(Attribute::ReadNone); -#endif FnAttrs.addAttribute(Attribute::NoUnwind); return AttributeList::get(C, AttributeSet::get(C, FnAttrs), @@ -1528,11 +1482,7 @@ static const auto gc_loaded_func = new JuliaFunction<>{ FnAttrs.addAttribute(Attribute::Speculatable); FnAttrs.addAttribute(Attribute::WillReturn); FnAttrs.addAttribute(Attribute::NoRecurse); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::none()); -#else - FnAttrs.addAttribute(Attribute::ReadNone); -#endif AttrBuilder RetAttrs(C); RetAttrs.addAttribute(Attribute::NonNull); RetAttrs.addAttribute(Attribute::NoUndef); @@ -2436,11 +2386,7 @@ static Value *emit_inttoptr(jl_codectx_t &ctx, Value *v, Type *ty) auto ptr = I->getOperand(0); if (ty->getPointerAddressSpace() == ptr->getType()->getPointerAddressSpace()) return ptr; - #if JL_LLVM_VERSION >= 170000 else - #else - else if (cast(ty)->hasSameElementTypeAs(cast(ptr->getType()))) - #endif return ctx.builder.CreateAddrSpaceCast(ptr, ty); } ++EmittedIntToPtrs; @@ -10339,10 +10285,6 @@ char jl_using_perf_jitevents = 0; int jl_is_timing_passes = 0; -#if JL_LLVM_VERSION < 170000 -int jl_opaque_ptrs_set = 0; -#endif - extern "C" void jl_init_llvm(void) { jl_page_size = jl_getpagesize(); @@ -10361,12 +10303,8 @@ extern "C" void jl_init_llvm(void) initializeAnalysis(Registry); initializeTransformUtils(Registry); initializeInstCombine(Registry); -#if JL_LLVM_VERSION >= 160000 - // TODO -#else - initializeAggressiveInstCombine(Registry); - initializeInstrumentation(Registry); -#endif + // TODO: initializeAggressiveInstCombine(Registry); + // TODO: initializeInstrumentation(Registry); initializeTarget(Registry); #ifdef USE_POLLY polly::initializePollyPasses(Registry); @@ -10392,17 +10330,6 @@ extern "C" void jl_init_llvm(void) if (clopt && clopt->getNumOccurrences() == 0) cl::ProvidePositionalOption(clopt, "4", 1); - #if JL_LLVM_VERSION < 170000 - // we want the opaque-pointers to be opt-in, per LLVMContext, for this release - // so change the default value back to pre-14.x, without changing the NumOccurrences flag for it - clopt = llvmopts.lookup("opaque-pointers"); - if (clopt && clopt->getNumOccurrences() == 0) { - clopt->addOccurrence(1, clopt->ArgStr, "false", true); - } else { - jl_opaque_ptrs_set = 1; - } - #endif - clopt = llvmopts.lookup("time-passes"); if (clopt && clopt->getNumOccurrences() > 0) jl_is_timing_passes = 1; @@ -10427,8 +10354,16 @@ extern "C" void jl_init_llvm(void) const char *jit_profiling = getenv("ENABLE_JITPROFILING"); #if defined(JL_USE_INTEL_JITEVENTS) - if (jit_profiling && atoi(jit_profiling)) { - jl_using_intel_jitevents = 1; + if (jit_profiling) { + if (atoi(jit_profiling)) { + jl_using_intel_jitevents = 1; + } + } else { +#ifdef USE_ITTAPI + __itt_collection_state state = __itt_get_collection_state(); + jl_using_intel_jitevents = state == __itt_collection_init_successful || + state == __itt_collection_collector_exists; +#endif } #endif diff --git a/src/disasm.cpp b/src/disasm.cpp index b944e48430c29..6a7985bd7ec1b 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -58,11 +58,7 @@ #include "llvm-version.h" // for outputting disassembly -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include #include @@ -505,7 +501,7 @@ jl_value_t *jl_dump_function_ir_impl(jl_llvmf_dump_t *dump, char strip_ir_metada auto TSM = std::unique_ptr(unwrap(dump->TSM)); //If TSM is not passed in, then the context MUST be locked externally. //RAII will release the lock - Optional lock; + std::optional lock; if (TSM) { lock.emplace(TSM->getContext().getLock()); } @@ -1107,11 +1103,7 @@ static void jl_dump_asm_internal( const MCOperand &OpI = Inst.getOperand(Op); if (OpI.isImm()) { int64_t imm = OpI.getImm(); - #if JL_LLVM_VERSION >= 170000 if (opinfo.operands()[Op].OperandType == MCOI::OPERAND_PCREL) - #else - if (opinfo.OpInfo[Op].OperandType == MCOI::OPERAND_PCREL) - #endif imm += Fptr + Index; const char *name = DisInfo.lookupSymbolName(imm); if (name) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 72eb17115f4c7..f3793939610b5 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -380,7 +380,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT ios_mem(&str_, 0); JL_STREAM* str = (JL_STREAM*)&str_; jl_static_show(str, (jl_value_t*)type); - + node_type = StringRef((const char*)str_.buf, str_.size); name = StringRef((const char*)str_.buf, str_.size); } diff --git a/src/init.c b/src/init.c index 1cd14e8556cc6..7b41e63e98455 100644 --- a/src/init.c +++ b/src/init.c @@ -849,6 +849,10 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) #if defined(_COMPILER_GCC_) && __GNUC__ >= 12 #pragma GCC diagnostic ignored "-Wdangling-pointer" #endif + if (jl_options.task_metrics == JL_OPTIONS_TASK_METRICS_ON) { + // enable before creating the root task so it gets timings too. + jl_atomic_fetch_add(&jl_task_metrics_enabled, 1); + } // warning: this changes `jl_current_task`, so be careful not to call that from this function jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi); #pragma GCC diagnostic pop diff --git a/src/interpreter.c b/src/interpreter.c index 49a3afed14f0c..fa4fba94a60a5 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -137,8 +137,28 @@ static jl_value_t *do_invoke(jl_value_t **args, size_t nargs, interpreter_state argv[i-1] = eval_value(args[i], s); jl_value_t *c = args[0]; assert(jl_is_code_instance(c) || jl_is_method_instance(c)); - jl_method_instance_t *meth = jl_is_method_instance(c) ? (jl_method_instance_t*)c : ((jl_code_instance_t*)c)->def; - jl_value_t *result = jl_invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, meth); + jl_value_t *result = NULL; + if (jl_is_code_instance(c)) { + jl_code_instance_t *codeinst = (jl_code_instance_t*)c; + assert(jl_atomic_load_relaxed(&codeinst->min_world) <= jl_current_task->world_age && + jl_current_task->world_age <= jl_atomic_load_relaxed(&codeinst->max_world)); + jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke); + if (!invoke) { + jl_compile_codeinst(codeinst); + invoke = jl_atomic_load_acquire(&codeinst->invoke); + } + if (invoke) { + result = invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, codeinst); + + } else { + if (codeinst->owner != jl_nothing) { + jl_error("Failed to invoke or compile external codeinst"); + } + result = jl_invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, codeinst->def); + } + } else { + result = jl_invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, (jl_method_instance_t*)c); + } JL_GC_POP(); return result; } @@ -569,25 +589,11 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip, s->locals[n - 1] = rhs; } else { - jl_module_t *modu; - jl_sym_t *sym; - // Plain assignment is allowed to create bindings at - // toplevel and only for the current module - int alloc = toplevel; - if (jl_is_globalref(lhs)) { - modu = jl_globalref_mod(lhs); - sym = jl_globalref_name(lhs); - alloc &= modu == s->module; - } - else { - assert(jl_is_symbol(lhs)); - modu = s->module; - sym = (jl_sym_t*)lhs; - } - JL_GC_PUSH1(&rhs); - jl_binding_t *b = jl_get_binding_wr(modu, sym, alloc); - jl_checked_assignment(b, modu, sym, rhs); - JL_GC_POP(); + // This is an unmodeled error. Our frontend only generates + // legal `=` expressions, but since GlobalRef used to be legal + // here, give a loud error in case any package is modifying + // internals. + jl_error("Invalid IR: Assignment LHS not a Slot"); } } else if (head == jl_leave_sym) { diff --git a/src/ircode.c b/src/ircode.c index bec8d46513eef..9e64e3fe2b574 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -10,19 +10,79 @@ #include "julia_internal.h" #include "serialize.h" -#ifndef _OS_WINDOWS_ -#include -#endif - -#include "valgrind.h" #include "julia_assert.h" #ifdef __cplusplus extern "C" { #endif +#define TAG_SYMBOL 2 +#define TAG_SSAVALUE 3 +#define TAG_DATATYPE 4 +#define TAG_SLOTNUMBER 5 +#define TAG_SVEC 6 +#define TAG_NEARBYSSAVALUE 7 +#define TAG_NULL 8 +#define TAG_EXPR 9 +#define TAG_PHINODE 10 +#define TAG_PHICNODE 11 +#define TAG_LONG_SYMBOL 12 +#define TAG_LONG_SVEC 13 +#define TAG_LONG_EXPR 14 +#define TAG_LONG_PHINODE 15 +#define TAG_LONG_PHICNODE 16 +#define TAG_METHODROOT 17 +#define TAG_EDGE 18 +#define TAG_STRING 19 +#define TAG_SHORT_INT64 20 +//#define TAG_UNUSED 21 +#define TAG_CNULL 22 +#define TAG_ARRAY1D 23 +#define TAG_SINGLETON 24 +#define TAG_MODULE 25 +#define TAG_TVAR 26 +#define TAG_METHOD_INSTANCE 27 +#define TAG_METHOD 28 +#define TAG_CODE_INSTANCE 29 +#define TAG_COMMONSYM 30 +#define TAG_NEARBYGLOBAL 31 +#define TAG_GLOBALREF 32 +#define TAG_CORE 33 +#define TAG_BASE 34 +#define TAG_BITYPENAME 35 +#define TAG_NEARBYMODULE 36 +#define TAG_INT32 37 +#define TAG_INT64 38 +#define TAG_UINT8 39 +#define TAG_VECTORTY 40 +#define TAG_PTRTY 41 +#define TAG_LONG_SSAVALUE 42 +#define TAG_LONG_METHODROOT 43 +#define TAG_LONG_EDGE 44 +#define TAG_SHORTER_INT64 45 +#define TAG_SHORT_INT32 46 +#define TAG_CALL1 47 +#define TAG_CALL2 48 +#define TAG_SHORT_BACKREF 49 +#define TAG_BACKREF 50 +#define TAG_UNIONALL 51 +#define TAG_GOTONODE 52 +#define TAG_QUOTENODE 53 +#define TAG_GENERAL 54 +#define TAG_GOTOIFNOT 55 +#define TAG_RETURNNODE 56 +#define TAG_ARGUMENT 57 +#define TAG_RELOC_METHODROOT 58 +#define TAG_BINDING 59 +#define TAG_MEMORYT 60 +#define TAG_ENTERNODE 61 + +#define LAST_TAG 61 + + typedef struct { ios_t *s; + size_t ssaid; // method we're compressing for jl_method_t *method; jl_svec_t *edges; @@ -38,29 +98,29 @@ static jl_value_t *deser_tag[256]; static htable_t common_symbol_tag; static jl_value_t *deser_symbols[256]; -void *jl_lookup_ser_tag(jl_value_t *v) +static void *jl_lookup_ser_tag(jl_value_t *v) { return ptrhash_get(&ser_tag, v); } -void *jl_lookup_common_symbol(jl_value_t *v) +static void *jl_lookup_common_symbol(jl_value_t *v) { return ptrhash_get(&common_symbol_tag, v); } -jl_value_t *jl_deser_tag(uint8_t tag) +static jl_value_t *jl_deser_tag(uint8_t tag) { return deser_tag[tag]; } -jl_value_t *jl_deser_symbol(uint8_t tag) +static jl_value_t *jl_deser_symbol(uint8_t tag) { return deser_symbols[tag]; } // --- encoding --- -static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED; +static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal); #define jl_encode_value(s, v) jl_encode_value_((s), (jl_value_t*)(v), 0) static void tagged_root(rle_reference *rr, jl_ircode_state *s, int i) @@ -69,7 +129,7 @@ static void tagged_root(rle_reference *rr, jl_ircode_state *s, int i) s->relocatability = 0; } -static void literal_val_id(rle_reference *rr, jl_ircode_state *s, jl_value_t *v) JL_GC_DISABLED +static void literal_val_id(rle_reference *rr, jl_ircode_state *s, jl_value_t *v) { jl_array_t *rs = s->method->roots; int i, l = jl_array_nrows(rs); @@ -142,7 +202,7 @@ static void jl_encode_as_indexed_root(jl_ircode_state *s, jl_value_t *v) } } -static void jl_encode_memory_slice(jl_ircode_state *s, jl_genericmemory_t *mem, size_t offset, size_t len) JL_GC_DISABLED +static void jl_encode_memory_slice(jl_ircode_state *s, jl_genericmemory_t *mem, size_t offset, size_t len) { jl_datatype_t *t = (jl_datatype_t*)jl_typetagof(mem); size_t i; @@ -180,7 +240,7 @@ static void jl_encode_memory_slice(jl_ircode_state *s, jl_genericmemory_t *mem, } } -static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED +static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) { size_t i; @@ -248,6 +308,10 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) jl_encode_value(s, jl_globalref_name(v)); } } + else if (jl_is_ssavalue(v) && s->ssaid - ((jl_ssavalue_t*)v)->id < 256) { + write_uint8(s->s, TAG_NEARBYSSAVALUE); + write_uint8(s->s, s->ssaid - ((jl_ssavalue_t*)v)->id); + } else if (jl_is_ssavalue(v) && ((jl_ssavalue_t*)v)->id < 256 && ((jl_ssavalue_t*)v)->id >= 0) { write_uint8(s->s, TAG_SSAVALUE); write_uint8(s->s, ((jl_ssavalue_t*)v)->id); @@ -306,8 +370,11 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) } for (i = 0; i < l; i++) { int32_t e = jl_array_data(edges, int32_t)[i]; - if (e <= 20) - jl_encode_value(s, jl_box_int32(e)); + if (e <= 0 && e <= 20) { // 1-byte encodings + jl_value_t *ebox = jl_box_int32(e); + JL_GC_PROMISE_ROOTED(ebox); + jl_encode_value(s, ebox); + } else jl_encode_int32(s, e); } @@ -333,25 +400,39 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) } else if (jl_is_gotonode(v)) { write_uint8(s->s, TAG_GOTONODE); - jl_encode_value(s, jl_get_nth_field(v, 0)); + jl_value_t *f = jl_get_nth_field(v, 0); + JL_GC_PUSH1(&f); + jl_encode_value(s, f); + JL_GC_POP(); } else if (jl_is_gotoifnot(v)) { write_uint8(s->s, TAG_GOTOIFNOT); - jl_encode_value(s, jl_get_nth_field(v, 0)); - jl_encode_value(s, jl_get_nth_field(v, 1)); + jl_value_t *f = jl_get_nth_field_noalloc(v, 0); + JL_GC_PUSH1(&f); + jl_encode_value(s, f); + f = jl_get_nth_field(v, 1); + jl_encode_value(s, f); + JL_GC_POP(); } else if (jl_is_enternode(v)) { write_uint8(s->s, TAG_ENTERNODE); - jl_encode_value(s, jl_get_nth_field(v, 0)); - jl_encode_value(s, jl_get_nth_field(v, 1)); + jl_value_t *f = jl_get_nth_field(v, 0); + JL_GC_PUSH1(&f); + jl_encode_value(s, f); + f = jl_get_nth_field_noalloc(v, 1); + jl_encode_value(s, f); + JL_GC_POP(); } else if (jl_is_argument(v)) { write_uint8(s->s, TAG_ARGUMENT); - jl_encode_value(s, jl_get_nth_field(v, 0)); + jl_value_t *f = jl_get_nth_field(v, 0); + JL_GC_PUSH1(&f); + jl_encode_value(s, f); + JL_GC_POP(); } else if (jl_is_returnnode(v)) { write_uint8(s->s, TAG_RETURNNODE); - jl_encode_value(s, jl_get_nth_field(v, 0)); + jl_encode_value(s, jl_returnnode_value(v)); } else if (jl_is_quotenode(v)) { write_uint8(s->s, TAG_QUOTENODE); @@ -394,19 +475,15 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) write_int32(s->s, jl_string_len(v)); ios_write(s->s, jl_string_data(v), jl_string_len(v)); } - else if (as_literal && jl_is_array(v)) { + else if (as_literal && jl_is_array(v) && jl_array_ndims(v)) { jl_array_t *ar = (jl_array_t*)v; - if (jl_array_ndims(ar) == 1) { - write_uint8(s->s, TAG_ARRAY1D); - } - else { - write_uint8(s->s, TAG_ARRAY); - write_uint16(s->s, jl_array_ndims(ar)); - } - for (i = 0; i < jl_array_ndims(ar); i++) - jl_encode_value(s, jl_box_long(jl_array_dim(ar, i))); + write_uint8(s->s, TAG_ARRAY1D); + size_t l = jl_array_dim0(ar); + jl_value_t *lbox = jl_box_long(l); + JL_GC_PUSH1(&lbox); + jl_encode_value(s, lbox); + JL_GC_POP(); jl_encode_value(s, jl_typeof(ar)); - size_t l = jl_array_len(ar); const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(ar->ref.mem))->layout; size_t offset; if (layout->flags.arrayelem_isunion || layout->size == 0) @@ -419,7 +496,10 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) jl_genericmemory_t* m = (jl_genericmemory_t*)v; write_uint8(s->s, TAG_MEMORYT); jl_encode_value(s, (jl_datatype_t*)jl_typetagof(v)); - jl_encode_value(s, jl_box_long(m->length)); + jl_value_t *lbox = jl_box_long(m->length); + JL_GC_PUSH1(&lbox); + jl_encode_value(s, lbox); + JL_GC_POP(); jl_encode_memory_slice(s, m, 0, m->length); } else if (as_literal && jl_is_layout_opaque(((jl_datatype_t*)jl_typeof(v))->layout)) { @@ -428,16 +508,8 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) else if (as_literal || jl_is_uniontype(v) || jl_is_newvarnode(v) || jl_is_linenode(v) || jl_is_upsilonnode(v) || jl_is_pinode(v) || jl_is_slotnumber(v) || jl_is_ssavalue(v) || (jl_isbits(jl_typeof(v)) && jl_datatype_size(jl_typeof(v)) <= 64)) { + write_uint8(s->s, TAG_GENERAL); jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v); - size_t tsz = jl_datatype_size(t); - if (tsz <= 255) { - write_uint8(s->s, TAG_SHORT_GENERAL); - write_uint8(s->s, tsz); - } - else { - write_uint8(s->s, TAG_GENERAL); - write_int32(s->s, tsz); - } jl_encode_value(s, t); char *data = (char*)jl_data_ptr(v); @@ -477,7 +549,8 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) static jl_code_info_flags_t code_info_flags(uint8_t propagate_inbounds, uint8_t has_fcall, uint8_t nospecializeinfer, uint8_t isva, - uint8_t inlining, uint8_t constprop, uint8_t nargsmatchesmethod) + uint8_t inlining, uint8_t constprop, uint8_t nargsmatchesmethod, + jl_array_t *ssaflags) { jl_code_info_flags_t flags; flags.bits.propagate_inbounds = propagate_inbounds; @@ -487,39 +560,45 @@ static jl_code_info_flags_t code_info_flags(uint8_t propagate_inbounds, uint8_t flags.bits.inlining = inlining; flags.bits.constprop = constprop; flags.bits.nargsmatchesmethod = nargsmatchesmethod; + flags.bits.has_ssaflags = 0; + const uint32_t *ssaflag_data = jl_array_data(ssaflags, uint32_t); + for (size_t i = 0, l = jl_array_dim0(ssaflags); i < l; i++) + if (ssaflag_data[i]) + flags.bits.has_ssaflags = 1; return flags; } // --- decoding --- -static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED; +static jl_value_t *jl_decode_value(jl_ircode_state *s); -static jl_value_t *jl_decode_value_svec(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED +static jl_value_t *jl_decode_value_svec(jl_ircode_state *s, uint8_t tag) { size_t i, len; if (tag == TAG_SVEC) len = read_uint8(s->s); else len = read_int32(s->s); - jl_svec_t *sv = jl_alloc_svec_uninit(len); - jl_value_t **data = jl_svec_data(sv); - for (i = 0; i < len; i++) { - data[i] = jl_decode_value(s); - } + jl_svec_t *sv = jl_alloc_svec(len); + JL_GC_PUSH1(&sv); + for (i = 0; i < len; i++) + jl_svecset(sv, i, jl_decode_value(s)); + JL_GC_POP(); return (jl_value_t*)sv; } -static jl_value_t *jl_decode_value_memory(jl_ircode_state *s, jl_value_t *mty, size_t nel) JL_GC_DISABLED +static jl_genericmemory_t *jl_decode_value_memory(jl_ircode_state *s, jl_value_t *mty, size_t nel) { jl_genericmemory_t *m = jl_alloc_genericmemory(mty, nel); + JL_GC_PUSH1(&m); const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty)->layout; if (layout->flags.arrayelem_isboxed) { jl_value_t **data = (jl_value_t**)m->ptr; size_t i, numel = m->length; for (i = 0; i < numel; i++) { data[i] = jl_decode_value(s); + jl_gc_wb(m, data[i]); } - assert(jl_astaggedvalue(m)->bits.gc == GC_CLEAN); // gc is disabled } else if (layout->first_ptr >= 0) { size_t i, numel = m->length; @@ -534,49 +613,48 @@ static jl_value_t *jl_decode_value_memory(jl_ircode_state *s, jl_value_t *mty, s if ((char*)fld != start) ios_readall(s->s, start, (const char*)fld - start); *fld = jl_decode_value(s); + jl_gc_wb(m, fld); start = (char*)&fld[1]; } data += elsz; if (data != start) ios_readall(s->s, start, data - start); } - assert(jl_astaggedvalue(m)->bits.gc == GC_CLEAN); // gc is disabled } else { size_t extra = jl_genericmemory_isbitsunion(m) ? m->length : 0; size_t tot = m->length * layout->size + extra; ios_readall(s->s, (char*)m->ptr, tot); } - return (jl_value_t*)m; + JL_GC_POP(); + return m; } JL_DLLEXPORT jl_array_t *jl_alloc_array_nd(jl_value_t *atype, size_t *dims, size_t ndims); -static jl_value_t *jl_decode_value_array(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED +static jl_value_t *jl_decode_value_array1d(jl_ircode_state *s, uint8_t tag) { - int16_t i, ndims; - if (tag == TAG_ARRAY1D) - ndims = 1; - else - ndims = read_uint16(s->s); - size_t *dims = (size_t*)alloca(ndims * sizeof(size_t)); - size_t len = 1; - for (i = 0; i < ndims; i++) { - dims[i] = jl_unbox_long(jl_decode_value(s)); - len *= dims[i]; - } + int16_t ndims = 1; + size_t dim0 = jl_unbox_long(jl_decode_value(s)); + size_t len = dim0; jl_value_t *aty = jl_decode_value(s); - jl_array_t *a = jl_alloc_array_nd(aty, dims, ndims); - a->ref.mem = (jl_genericmemory_t*)jl_decode_value_memory(s, jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)aty, 0), 1), len); + JL_GC_PROMISE_ROOTED(aty); // (JL_ALWAYS_LEAFTYPE) + jl_genericmemory_t *mem = jl_decode_value_memory(s, jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)aty, 0), 1), len); + JL_GC_PUSH1(&mem); + int tsz = sizeof(jl_array_t) + ndims*sizeof(size_t); + jl_array_t *a = (jl_array_t*)jl_gc_alloc(s->ptls, tsz, aty); + a->ref.mem = mem; const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(a->ref.mem))->layout; if (layout->flags.arrayelem_isunion || layout->size == 0) a->ref.ptr_or_offset = (void*)0; else a->ref.ptr_or_offset = a->ref.mem->ptr; + a->dimsize[0] = dim0; + JL_GC_POP(); return (jl_value_t*)a; } -static jl_value_t *jl_decode_value_expr(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED +static jl_value_t *jl_decode_value_expr(jl_ircode_state *s, uint8_t tag) { size_t i, len; jl_sym_t *head = NULL; @@ -597,14 +675,18 @@ static jl_value_t *jl_decode_value_expr(jl_ircode_state *s, uint8_t tag) JL_GC_D if (head == NULL) head = (jl_sym_t*)jl_decode_value(s); jl_expr_t *e = jl_exprn(head, len); + JL_GC_PUSH1(&e); jl_value_t **data = jl_array_ptr_data(e->args); + jl_value_t *owner = jl_array_owner(e->args); for (i = 0; i < len; i++) { data[i] = jl_decode_value(s); + jl_gc_wb(owner, data[i]); } + JL_GC_POP(); return (jl_value_t*)e; } -static jl_value_t *jl_decode_value_phi(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED +static jl_value_t *jl_decode_value_phi(jl_ircode_state *s, uint8_t tag) { size_t i, len_e, len_v; if (tag == TAG_PHINODE) { @@ -614,9 +696,13 @@ static jl_value_t *jl_decode_value_phi(jl_ircode_state *s, uint8_t tag) JL_GC_DI len_e = read_int32(s->s); len_v = read_int32(s->s); } - jl_array_t *e = jl_alloc_array_1d(jl_array_int32_type, len_e); - jl_array_t *v = jl_alloc_vec_any(len_v); - jl_value_t *phi = jl_new_struct(jl_phinode_type, e, v); + jl_array_t *e = NULL; + jl_array_t *v = NULL; + jl_value_t *phi = NULL; + JL_GC_PUSH3(&e, &v, &phi); + e = jl_alloc_array_1d(jl_array_int32_type, len_e); + v = jl_alloc_vec_any(len_v); + phi = jl_new_struct(jl_phinode_type, e, v); int32_t *data_e = jl_array_data(e, int32_t); for (i = 0; i < len_e; i++) { data_e[i] = jl_unbox_int32(jl_decode_value(s)); @@ -624,11 +710,13 @@ static jl_value_t *jl_decode_value_phi(jl_ircode_state *s, uint8_t tag) JL_GC_DI jl_value_t **data_v = jl_array_ptr_data(v); for (i = 0; i < len_v; i++) { data_v[i] = jl_decode_value(s); + jl_gc_wb(jl_array_owner(v), data_v[i]); } + JL_GC_POP(); return phi; } -static jl_value_t *jl_decode_value_phic(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED +static jl_value_t *jl_decode_value_phic(jl_ircode_state *s, uint8_t tag) { size_t i, len; if (tag == TAG_PHICNODE) @@ -636,41 +724,53 @@ static jl_value_t *jl_decode_value_phic(jl_ircode_state *s, uint8_t tag) JL_GC_D else len = read_int32(s->s); jl_array_t *v = jl_alloc_vec_any(len); - jl_value_t *phic = jl_new_struct(jl_phicnode_type, v); + jl_value_t *phic = (jl_value_t*)v; + JL_GC_PUSH1(&phic); + phic = jl_new_struct(jl_phicnode_type, v); jl_value_t **data = jl_array_ptr_data(v); for (i = 0; i < len; i++) { data[i] = jl_decode_value(s); + jl_gc_wb(jl_array_owner(v), data[i]); } + JL_GC_POP(); return phic; } -static jl_value_t *jl_decode_value_globalref(jl_ircode_state *s) JL_GC_DISABLED +static jl_value_t *jl_decode_value_globalref(jl_ircode_state *s) { - jl_value_t *mod = jl_decode_value(s); - jl_value_t *var = jl_decode_value(s); - return jl_module_globalref((jl_module_t*)mod, (jl_sym_t*)var); + jl_module_t *mod = (jl_module_t*)jl_decode_value(s); + JL_GC_PROMISE_ROOTED(mod); + jl_sym_t *var = (jl_sym_t*)jl_decode_value(s); + JL_GC_PROMISE_ROOTED(var); + return jl_module_globalref(mod, var); } -static jl_value_t *jl_decode_value_any(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED +static jl_value_t *jl_decode_value_any(jl_ircode_state *s) { - int32_t sz = (tag == TAG_SHORT_GENERAL ? read_uint8(s->s) : read_int32(s->s)); - jl_value_t *v = jl_gc_alloc(s->ptls, sz, NULL); - jl_set_typeof(v, (void*)(intptr_t)0xf50); jl_datatype_t *dt = (jl_datatype_t*)jl_decode_value(s); - if (dt->smalltag) + JL_GC_PROMISE_ROOTED(dt); // (JL_ALWAYS_LEAFTYPE) + // jl_new_struct_uninit + size_t sz = jl_datatype_size(dt); + jl_value_t *v = jl_gc_alloc(s->ptls, sz, dt); + if (dt->smalltag) // TODO: do we need this? jl_set_typetagof(v, dt->smalltag, 0); - else - jl_set_typeof(v, dt); char *data = (char*)jl_data_ptr(v); size_t i, np = dt->layout->npointers; char *start = data; - for (i = 0; i < np; i++) { - uint32_t ptr = jl_ptr_offset(dt, i); - jl_value_t **fld = &((jl_value_t**)data)[ptr]; - if ((char*)fld != start) - ios_readall(s->s, start, (const char*)fld - start); - *fld = jl_decode_value(s); - start = (char*)&fld[1]; + if (np) { + if (sz > 0) + memset(v, 0, sz); + JL_GC_PUSH1(&v); + for (i = 0; i < np; i++) { + uint32_t ptr = jl_ptr_offset(dt, i); + jl_value_t **fld = &((jl_value_t**)data)[ptr]; + if ((char*)fld != start) + ios_readall(s->s, start, (const char*)fld - start); + *fld = jl_decode_value(s); + jl_gc_wb(v, *fld); + start = (char*)&fld[1]; + } + JL_GC_POP(); } data += jl_datatype_size(dt); if (data != start) @@ -678,7 +778,7 @@ static jl_value_t *jl_decode_value_any(jl_ircode_state *s, uint8_t tag) JL_GC_DI return v; } -static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED +static jl_value_t *jl_decode_value(jl_ircode_state *s) { assert(!ios_eof(s->s)); jl_value_t *v; @@ -718,16 +818,21 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED case TAG_SSAVALUE: v = jl_box_ssavalue(read_uint8(s->s)); return v; + case TAG_NEARBYSSAVALUE: + v = jl_box_ssavalue(s->ssaid - read_uint8(s->s)); + return v; case TAG_LONG_SSAVALUE: v = jl_box_ssavalue(read_uint16(s->s)); return v; case TAG_SLOTNUMBER: v = jl_box_slotnumber(read_uint16(s->s)); return v; - case TAG_ARRAY: JL_FALLTHROUGH; case TAG_ARRAY1D: - return jl_decode_value_array(s, tag); + case TAG_ARRAY1D: + return jl_decode_value_array1d(s, tag); case TAG_MEMORYT: - return jl_decode_value_memory(s, jl_decode_value(s), jl_unbox_long(jl_decode_value(s))); + v = jl_decode_value(s); + JL_GC_PROMISE_ROOTED(v); // (JL_ALWAYS_LEAFTYPE) + return (jl_value_t*)jl_decode_value_memory(s, v, jl_unbox_long(jl_decode_value(s))); case TAG_EXPR: JL_FALLTHROUGH; case TAG_LONG_EXPR: JL_FALLTHROUGH; case TAG_CALL1: JL_FALLTHROUGH; @@ -738,27 +843,47 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED case TAG_PHICNODE: JL_FALLTHROUGH; case TAG_LONG_PHICNODE: return jl_decode_value_phic(s, tag); case TAG_GOTONODE: JL_FALLTHROUGH; case TAG_QUOTENODE: + { v = jl_new_struct_uninit(tag == TAG_GOTONODE ? jl_gotonode_type : jl_quotenode_type); + JL_GC_PUSH1(&v); set_nth_field(tag == TAG_GOTONODE ? jl_gotonode_type : jl_quotenode_type, v, 0, jl_decode_value(s), 0); + JL_GC_POP(); return v; + } case TAG_GOTOIFNOT: + { v = jl_new_struct_uninit(jl_gotoifnot_type); + JL_GC_PUSH1(&v); set_nth_field(jl_gotoifnot_type, v, 0, jl_decode_value(s), 0); set_nth_field(jl_gotoifnot_type, v, 1, jl_decode_value(s), 0); + JL_GC_POP(); return v; + } case TAG_ENTERNODE: + { v = jl_new_struct_uninit(jl_enternode_type); + JL_GC_PUSH1(&v); set_nth_field(jl_enternode_type, v, 0, jl_decode_value(s), 0); set_nth_field(jl_enternode_type, v, 1, jl_decode_value(s), 0); + JL_GC_POP(); return v; + } case TAG_ARGUMENT: + { v = jl_new_struct_uninit(jl_argument_type); + JL_GC_PUSH1(&v); set_nth_field(jl_argument_type, v, 0, jl_decode_value(s), 0); + JL_GC_POP(); return v; + } case TAG_RETURNNODE: + { v = jl_new_struct_uninit(jl_returnnode_type); + JL_GC_PUSH1(&v); set_nth_field(jl_returnnode_type, v, 0, jl_decode_value(s), 0); + JL_GC_POP(); return v; + } case TAG_SHORTER_INT64: v = jl_box_int64((int16_t)read_uint16(s->s)); return v; @@ -777,9 +902,14 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED case TAG_UINT8: return jl_box_uint8(read_uint8(s->s)); case TAG_NEARBYGLOBAL: - assert(s->method != NULL); + { + jl_method_t *m = s->method; + assert(m != NULL); + JL_GC_PROMISE_ROOTED(m); v = jl_decode_value(s); - return jl_module_globalref(s->method->module, (jl_sym_t*)v); + JL_GC_PROMISE_ROOTED(v); // symbol + return jl_module_globalref(m->module, (jl_sym_t*)v); + } case TAG_NEARBYMODULE: assert(s->method != NULL); return (jl_value_t*)s->method->module; @@ -792,19 +922,29 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED case TAG_BASE: return (jl_value_t*)jl_base_module; case TAG_VECTORTY: + { v = jl_decode_value(s); - return jl_apply_type2((jl_value_t*)jl_array_type, v, jl_box_long(1)); + JL_GC_PUSH1(&v); + v = jl_apply_type2((jl_value_t*)jl_array_type, v, jl_box_long(1)); + JL_GC_POP(); + return v; + } case TAG_PTRTY: + { v = jl_decode_value(s); - return jl_apply_type1((jl_value_t*)jl_pointer_type, v); + JL_GC_PUSH1(&v); + v = jl_apply_type1((jl_value_t*)jl_pointer_type, v); + JL_GC_POP(); + return v; + } case TAG_STRING: n = read_int32(s->s); v = jl_alloc_string(n); ios_readall(s->s, jl_string_data(v), n); return v; default: - assert(tag == TAG_GENERAL || tag == TAG_SHORT_GENERAL); - return jl_decode_value_any(s, tag); + assert(tag == TAG_GENERAL); + return jl_decode_value_any(s); } } @@ -880,8 +1020,6 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code) assert(jl_array_nrows(code->code) == codelocs_nstmts(code->debuginfo->codelocs) || jl_string_len(code->debuginfo->codelocs) == 0); ios_t dest; ios_mem(&dest, 0); - int en = jl_gc_enable(0); // Might GC - size_t i; if (m->roots == NULL) { m->roots = jl_alloc_vec_any(0); @@ -890,6 +1028,7 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code) jl_value_t *edges = code->edges; jl_ircode_state s = { &dest, + 0, m, (!isdef && jl_is_svec(edges)) ? (jl_svec_t*)edges : jl_emptysvec, jl_current_task->ptls, @@ -900,7 +1039,8 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code) jl_code_info_flags_t flags = code_info_flags(code->propagate_inbounds, code->has_fcall, code->nospecializeinfer, code->isva, code->inlining, code->constprop, - nargsmatchesmethod); + nargsmatchesmethod, + code->ssaflags); write_uint16(s.s, checked_size(flags.packed, IR_DATASIZE_FLAGS)); write_uint16(s.s, checked_size(code->purity.bits, IR_DATASIZE_PURITY)); write_uint16(s.s, checked_size(code->inlining_cost, IR_DATASIZE_INLINING_COST)); @@ -919,38 +1059,45 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code) write_int32(s.s, (int32_t)nargs); } - for (i = 0; i < 5; i++) { - int copy = 1; - if (i == 1) { // skip debuginfo - assert(jl_field_offset(jl_code_info_type, i) == offsetof(jl_code_info_t, debuginfo)); - continue; - } - jl_encode_value_(&s, jl_get_nth_field((jl_value_t*)code, i), copy); + size_t i, l = jl_array_dim0(code->code); + write_uint64(s.s, l); + for (i = 0; i < l; i++) { + s.ssaid = i; + jl_encode_value(&s, jl_array_ptr_ref(code->code, i)); } + s.ssaid = 0; + jl_encode_value_(&s, (jl_value_t*)code->ssavaluetypes, 1); + assert(jl_typetagis(code->ssaflags, jl_array_uint32_type)); + assert(jl_array_dim0(code->ssaflags) == l); + const uint32_t *ssaflags_data = jl_array_data(code->ssaflags, uint32_t); + if (flags.bits.has_ssaflags) + ios_write(s.s, (const char*)ssaflags_data, l * sizeof(*ssaflags_data)); // For opaque closure, also save the slottypes. We technically only need the first slot type, // but this is simpler for now. We may want to refactor where this gets stored in the future. if (m->is_for_opaque_closure) jl_encode_value_(&s, code->slottypes, 1); + jl_string_t *v = NULL; + JL_GC_PUSH1(&v); // Slotnames. For regular methods, we require that m->slot_syms matches the // CodeInfo's slotnames, so we do not need to save it here. - if (m->generator) + if (m->generator) { // can't optimize generated functions - jl_encode_value_(&s, (jl_value_t*)jl_compress_argnames(code->slotnames), 1); - else + v = jl_compress_argnames(code->slotnames); + jl_encode_value_(&s, (jl_value_t*)v, 1); + } + else { jl_encode_value(&s, jl_nothing); + } write_uint8(s.s, s.relocatability); ios_flush(s.s); - jl_string_t *v = jl_pchar_to_string(s.s->buf, s.s->size); + v = jl_pchar_to_string(s.s->buf, s.s->size); ios_close(s.s); - if (jl_array_nrows(m->roots) == 0) { + if (jl_array_nrows(m->roots) == 0) m->roots = NULL; - } - JL_GC_PUSH1(&v); - jl_gc_enable(en); JL_UNLOCK(&m->writelock); // Might GC JL_GC_POP(); @@ -965,21 +1112,22 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t JL_LOCK(&m->writelock); // protect the roots array (Might GC) assert(jl_is_method(m)); assert(jl_is_string(data)); - size_t i; ios_t src; ios_mem(&src, 0); ios_setbuf(&src, (char*)jl_string_data(data), jl_string_len(data), 0); src.size = jl_string_len(data); - int en = jl_gc_enable(0); // Might GC jl_ircode_state s = { &src, + 0, m, metadata == NULL ? NULL : jl_atomic_load_relaxed(&metadata->edges), jl_current_task->ptls, 1 }; - jl_code_info_t *code = jl_new_code_info_uninit(); + jl_value_t *slotnames = NULL; + JL_GC_PUSH2(&code, &slotnames); + jl_code_info_flags_t flags; flags.packed = read_uint16(s.s); code->inlining = flags.bits.inlining; @@ -991,9 +1139,9 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t code->purity.bits = read_uint16(s.s); code->inlining_cost = read_uint16(s.s); - size_t nslots = read_int32(s.s); code->slotflags = jl_alloc_array_1d(jl_array_uint8_type, nslots); + jl_gc_wb(code, code->slotflags); ios_readall(s.s, jl_array_data(code->slotflags, char), nslots); if (flags.bits.nargsmatchesmethod) { @@ -1002,25 +1150,40 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t code->nargs = read_int32(s.s); } - for (i = 0; i < 5; i++) { - if (i == 1) // skip debuginfo - continue; - assert(jl_field_isptr(jl_code_info_type, i)); - jl_value_t **fld = (jl_value_t**)((char*)jl_data_ptr(code) + jl_field_offset(jl_code_info_type, i)); - *fld = jl_decode_value(&s); - } - if (m->is_for_opaque_closure) + size_t i, l = read_uint64(s.s); + code->code = jl_alloc_array_1d(jl_array_any_type, l); + jl_gc_wb(code, code->code); + for (i = 0; i < l; i++) { + s.ssaid = i; + jl_array_ptr_set(code->code, i, jl_decode_value(&s)); + } + s.ssaid = 0; + code->ssavaluetypes = jl_decode_value(&s); + jl_gc_wb(code, code->ssavaluetypes); + code->ssaflags = jl_alloc_array_1d(jl_array_uint32_type, l); + jl_gc_wb(code, code->ssaflags); + uint32_t *ssaflags_data = jl_array_data(code->ssaflags, uint32_t); + if (flags.bits.has_ssaflags) + ios_readall(s.s, (char*)ssaflags_data, l * sizeof(*ssaflags_data)); + else + memset(ssaflags_data, 0, l * sizeof(*ssaflags_data)); + + if (m->is_for_opaque_closure) { code->slottypes = jl_decode_value(&s); + jl_gc_wb(code, code->slottypes); + } - jl_value_t *slotnames = jl_decode_value(&s); + slotnames = jl_decode_value(&s); if (!jl_is_string(slotnames)) slotnames = m->slot_syms; code->slotnames = jl_uncompress_argnames(slotnames); + jl_gc_wb(code, code->slotnames); if (metadata) code->debuginfo = jl_atomic_load_relaxed(&metadata->debuginfo); else code->debuginfo = m->debuginfo; + jl_gc_wb(code, code->debuginfo); assert(code->debuginfo); assert(jl_array_nrows(code->code) == codelocs_nstmts(code->debuginfo->codelocs) || jl_string_len(code->debuginfo->codelocs) == 0); @@ -1029,10 +1192,7 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t assert(ios_getc(s.s) == -1); ios_close(s.s); - JL_GC_PUSH1(&code); - jl_gc_enable(en); JL_UNLOCK(&m->writelock); // Might GC - JL_GC_POP(); if (metadata) { code->parent = metadata->def; jl_gc_wb(code, code->parent); @@ -1043,6 +1203,7 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t code->edges = (jl_value_t*)s.edges; jl_gc_wb(code, s.edges); } + JL_GC_POP(); return code; } @@ -1470,7 +1631,7 @@ void jl_init_serializer(void) deser_tag[TAG_DATATYPE] = (jl_value_t*)jl_datatype_type; deser_tag[TAG_SLOTNUMBER] = (jl_value_t*)jl_slotnumber_type; deser_tag[TAG_SVEC] = (jl_value_t*)jl_simplevector_type; - deser_tag[TAG_ARRAY] = (jl_value_t*)jl_array_type; + deser_tag[TAG_ARRAY1D] = (jl_value_t*)jl_array_type; deser_tag[TAG_MEMORYT] = (jl_value_t*)jl_genericmemory_type; deser_tag[TAG_EXPR] = (jl_value_t*)jl_expr_type; deser_tag[TAG_PHINODE] = (jl_value_t*)jl_phinode_type; diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 42ddfb688af39..80867daade267 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -37,11 +37,7 @@ #include #include #include -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include @@ -1103,22 +1099,13 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin { return Error::success(); } -#if JL_LLVM_VERSION >= 160000 Error notifyRemovingResources(JITDylib &JD, orc::ResourceKey K) override -#else - Error notifyRemovingResources(orc::ResourceKey K) override -#endif { return Error::success(); } -#if JL_LLVM_VERSION >= 160000 void notifyTransferringResources(JITDylib &JD, orc::ResourceKey DstKey, orc::ResourceKey SrcKey) override {} -#else - void notifyTransferringResources(orc::ResourceKey DstKey, - orc::ResourceKey SrcKey) override {} -#endif void modifyPassConfig(MaterializationResponsibility &MR, jitlink::LinkGraph &, jitlink::PassConfiguration &PassConfig) override @@ -1168,21 +1155,12 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { Error notifyFailed(orc::MaterializationResponsibility &MR) override { return Error::success(); } -#if JL_LLVM_VERSION >= 160000 Error notifyRemovingResources(JITDylib &JD, orc::ResourceKey K) override -#else - Error notifyRemovingResources(orc::ResourceKey K) override -#endif { return Error::success(); } -#if JL_LLVM_VERSION >= 160000 void notifyTransferringResources(JITDylib &JD, orc::ResourceKey DstKey, orc::ResourceKey SrcKey) override {} -#else - void notifyTransferringResources(orc::ResourceKey DstKey, - orc::ResourceKey SrcKey) override {} -#endif void modifyPassConfig(orc::MaterializationResponsibility &, jitlink::LinkGraph &, @@ -1199,11 +1177,7 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { for (auto block : section.blocks()) { secsize += block->getSize(); } -#if JL_LLVM_VERSION >= 160000 if ((section.getMemProt() & orc::MemProt::Exec) == orc::MemProt::None) { -#else - if ((section.getMemProt() & jitlink::MemProt::Exec) == jitlink::MemProt::None) { -#endif data_size += secsize; } else { code_size += secsize; @@ -1235,11 +1209,7 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { // TODO: Port our memory management optimisations to JITLink instead of using the // default InProcessMemoryManager. std::unique_ptr createJITLinkMemoryManager() JL_NOTSAFEPOINT { -#if JL_LLVM_VERSION < 160000 - return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper()); -#else return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper(/*Reservation Granularity*/ 16 * 1024 * 1024)); -#endif } #ifdef _COMPILER_CLANG_ @@ -1288,21 +1258,11 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager { bool IsReadOnly) override { return MemMgr->allocateDataSection(Size, Alignment, SectionID, SectionName, IsReadOnly); } -#if JL_LLVM_VERSION >= 160000 virtual void reserveAllocationSpace(uintptr_t CodeSize, Align CodeAlign, uintptr_t RODataSize, Align RODataAlign, uintptr_t RWDataSize, Align RWDataAlign) override { return MemMgr->reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign, RWDataSize, RWDataAlign); } -#else - virtual void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign, - uintptr_t RODataSize, - uint32_t RODataAlign, - uintptr_t RWDataSize, - uint32_t RWDataAlign) override { - return MemMgr->reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign, RWDataSize, RWDataAlign); - } -#endif virtual bool needsToReserveAllocationSpace() override { return MemMgr->needsToReserveAllocationSpace(); } @@ -1415,7 +1375,7 @@ namespace { FeaturesStr = Features.getString(); } // Allocate a target... - Optional codemodel = + std::optional codemodel = #ifdef _P64 // Make sure we are using the large code model on 64bit // Let LLVM pick a default suitable for jitting on 32bit @@ -1830,11 +1790,6 @@ struct JuliaOJIT::DLSymOptimizer { if (named) { auto T = GV.getValueType(); assert(T->isPointerTy()); - #if JL_LLVM_VERSION < 170000 - if (!T->isOpaquePointerTy()) { - T = T->getNonOpaquePointerElementType(); - } - #endif init = GlobalAlias::create(T, 0, GlobalValue::PrivateLinkage, GV.getName() + ".jit", init, &M); } GV.setInitializer(init); @@ -1885,11 +1840,6 @@ struct JuliaOJIT::DLSymOptimizer { if (named) { auto T = CI->getType(); assert(T->isPointerTy()); - #if JL_LLVM_VERSION < 170000 - if (!T->isOpaquePointerTy()) { - T = T->getNonOpaquePointerElementType(); - } - #endif init = GlobalAlias::create(T, 0, GlobalValue::PrivateLinkage, CI->getName() + ".jit", init, &M); } // DCE and SimplifyCFG will kill the branching structure around @@ -1933,19 +1883,6 @@ void fixupTM(TargetMachine &TM) { } } -#if JL_LLVM_VERSION < 170000 -extern int jl_opaque_ptrs_set; -void SetOpaquePointer(LLVMContext &ctx) { - if (jl_opaque_ptrs_set) - return; -#ifndef JL_LLVM_OPAQUE_POINTERS - ctx.setOpaquePointers(false); -#else - ctx.setOpaquePointers(true); -#endif -} -#endif - llvm::DataLayout jl_create_datalayout(TargetMachine &TM) { // Mark our address spaces as non-integral auto jl_data_layout = TM.createDataLayout(); @@ -2053,7 +1990,7 @@ JuliaOJIT::JuliaOJIT() orc::SymbolAliasMap jl_crt = { // Float16 conversion routines -#if defined(_CPU_X86_64_) && defined(_OS_DARWIN_) && JL_LLVM_VERSION >= 160000 +#if defined(_CPU_X86_64_) && defined(_OS_DARWIN_) // LLVM 16 reverted to soft-float ABI for passing half on x86_64 Darwin // https://github.com/llvm/llvm-project/commit/2bcf51c7f82ca7752d1bba390a2e0cb5fdd05ca9 { mangle("__gnu_h2f_ieee"), { mangle("julia_half_to_float"), JITSymbolFlags::Exported } }, @@ -2087,7 +2024,6 @@ JuliaOJIT::JuliaOJIT() #ifdef MSAN_EMUTLS_WORKAROUND orc::SymbolMap msan_crt; - #if JL_LLVM_VERSION >= 170000 msan_crt[mangle("__emutls_get_address")] = {ExecutorAddr::fromPtr(msan_workaround::getTLSAddress), JITSymbolFlags::Exported}; msan_crt[mangle("__emutls_v.__msan_param_tls")] = {ExecutorAddr::fromPtr( reinterpret_cast(static_cast(msan_workaround::MSanTLS::param))), JITSymbolFlags::Exported}; @@ -2105,25 +2041,6 @@ JuliaOJIT::JuliaOJIT() reinterpret_cast(static_cast(msan_workaround::MSanTLS::va_arg_overflow_size))), JITSymbolFlags::Exported}; msan_crt[mangle("__emutls_v.__msan_origin_tls")] = {ExecutorAddr::fromPtr( reinterpret_cast(static_cast(msan_workaround::MSanTLS::origin))), JITSymbolFlags::Exported}; - #else - msan_crt[mangle("__emutls_get_address")] = JITEvaluatedSymbol::fromPointer(msan_workaround::getTLSAddress, JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_param_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::param)), JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_param_origin_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::param_origin)), JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_retval_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::retval)), JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_retval_origin_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::retval_origin)), JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_va_arg_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::va_arg)), JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_va_arg_origin_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::va_arg_origin)), JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_va_arg_overflow_size_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::va_arg_overflow_size)), JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_origin_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::origin)), JITSymbolFlags::Exported); - #endif cantFail(GlobalJD.define(orc::absoluteSymbols(msan_crt))); #endif #if JL_LLVM_VERSION < 190000 @@ -2133,11 +2050,7 @@ JuliaOJIT::JuliaOJIT() // hopefully fixed upstream by e7698a13e319a9919af04d3d693a6f6ea7168a44 static int64_t jl___asan_globals_registered; orc::SymbolMap asan_crt; - #if JL_LLVM_VERSION >= 170000 asan_crt[mangle("___asan_globals_registered")] = {ExecutorAddr::fromPtr(&jl___asan_globals_registered), JITSymbolFlags::Common | JITSymbolFlags::Exported}; - #else - asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&jl___asan_globals_registered, JITSymbolFlags::Common | JITSymbolFlags::Exported); - #endif cantFail(JD.define(orc::absoluteSymbols(asan_crt))); #endif #endif @@ -2148,9 +2061,6 @@ JuliaOJIT::~JuliaOJIT() = default; ThreadSafeContext JuliaOJIT::makeContext() { auto ctx = std::make_unique(); - #if JL_LLVM_VERSION < 170000 - SetOpaquePointer(*ctx); - #endif return orc::ThreadSafeContext(std::move(ctx)); } @@ -2162,11 +2072,7 @@ orc::SymbolStringPtr JuliaOJIT::mangle(StringRef Name) void JuliaOJIT::addGlobalMapping(StringRef Name, uint64_t Addr) { - #if JL_LLVM_VERSION >= 170000 cantFail(JD.define(orc::absoluteSymbols({{mangle(Name), {ExecutorAddr::fromPtr((void*)Addr), JITSymbolFlags::Exported}}}))); - #else - cantFail(JD.define(orc::absoluteSymbols({{mangle(Name), JITEvaluatedSymbol::fromPointer((void*)Addr)}}))); - #endif } void JuliaOJIT::addModule(orc::ThreadSafeModule TSM) @@ -2245,7 +2151,6 @@ SmallVector JuliaOJIT::findSymbols(ArrayRef Names) return Addrs; } -#if JL_LLVM_VERSION >= 170000 Expected JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly) { orc::JITDylib* SearchOrders[3] = {&JD, &GlobalJD, &ExternalJD}; @@ -2286,50 +2191,6 @@ uint64_t JuliaOJIT::getFunctionAddress(StringRef Name) } return addr->getAddress().getValue(); } -#else -JL_JITSymbol JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly) -{ - orc::JITDylib* SearchOrders[3] = {&JD, &GlobalJD, &ExternalJD}; - ArrayRef SearchOrder = ArrayRef(&SearchOrders[0], ExportedSymbolsOnly ? 3 : 1); - auto Sym = ES.lookup(SearchOrder, Name); - if (Sym) - return *Sym; - return Sym.takeError(); -} - -JL_JITSymbol JuliaOJIT::findUnmangledSymbol(StringRef Name) -{ - return findSymbol(getMangledName(Name), true); -} - -Expected JuliaOJIT::findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) -{ - orc::JITDylib* SearchOrders[3] = {&ExternalJD, &GlobalJD, &JD}; - ArrayRef SearchOrder = ArrayRef(&SearchOrders[0], ExternalJDOnly ? 1 : 3); - auto Sym = ES.lookup(SearchOrder, getMangledName(Name)); - return Sym; -} - -uint64_t JuliaOJIT::getGlobalValueAddress(StringRef Name) -{ - auto addr = findSymbol(getMangledName(Name), false); - if (!addr) { - consumeError(addr.takeError()); - return 0; - } - return cantFail(addr).getAddress(); -} - -uint64_t JuliaOJIT::getFunctionAddress(StringRef Name) -{ - auto addr = findSymbol(getMangledName(Name), false); - if (!addr) { - consumeError(addr.takeError()); - return 0; - } - return cantFail(addr).getAddress(); -} -#endif StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl_code_instance_t *codeinst) { @@ -2361,13 +2222,8 @@ StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl } #ifdef JL_USE_JITLINK -#if JL_LLVM_VERSION >= 170000 #define addAbsoluteToMap(map,name) \ (map[mangle(#name)] = {ExecutorAddr::fromPtr(&name), JITSymbolFlags::Exported | JITSymbolFlags::Callable}, orc::ExecutorAddr::fromPtr(&name)) -#else -#define addAbsoluteToMap(map,name) \ - (map[mangle(#name)] = JITEvaluatedSymbol::fromPointer(&name, JITSymbolFlags::Exported | JITSymbolFlags::Callable), orc::ExecutorAddr::fromPtr(&name)) -#endif void JuliaOJIT::enableJITDebuggingSupport() { @@ -2552,11 +2408,7 @@ void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTS } // Reparent the global variable: SG.removeFromParent(); - #if JL_LLVM_VERSION >= 170000 dest.insertGlobalVariable(&SG); - #else - dest.getGlobalList().push_back(&SG); - #endif // Comdat is owned by the Module SG.setComdat(nullptr); } @@ -2603,11 +2455,7 @@ void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTS } } SG.removeFromParent(); - #if JL_LLVM_VERSION >= 170000 dest.insertAlias(&SG); - #else - dest.getAliasList().push_back(&SG); - #endif } // metadata nodes need to be explicitly merged not just copied diff --git a/src/jitlayers.h b/src/jitlayers.h index baba5412226e3..6665be6a33faa 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -363,14 +363,6 @@ class MaxAlignedAllocImpl }; using MaxAlignedAlloc = MaxAlignedAllocImpl<>; -#if JL_LLVM_VERSION < 170000 -typedef JITSymbol JL_JITSymbol; -// The type that is similar to SymbolInfo on LLVM 4.0 is actually -// `JITEvaluatedSymbol`. However, we only use this type when a JITSymbol -// is expected. -typedef JITSymbol JL_SymbolInfo; -#endif - using CompilerResultT = Expected>; using OptimizerResultT = Expected; using SharedBytesT = StringSet::MapEntryTy)>>; @@ -472,7 +464,7 @@ class JuliaOJIT { } private: ResourcePool &pool; - Optional resource; + std::optional resource; }; OwningResource operator*() JL_NOTSAFEPOINT { @@ -558,16 +550,10 @@ class JuliaOJIT { orc::ExecutionSession &getExecutionSession() JL_NOTSAFEPOINT { return ES; } orc::JITDylib &getExternalJITDylib() JL_NOTSAFEPOINT { return ExternalJD; } - #if JL_LLVM_VERSION >= 170000 Expected findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT; Expected findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT; Expected findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) JL_NOTSAFEPOINT; SmallVector findSymbols(ArrayRef Names) JL_NOTSAFEPOINT; - #else - JITEvaluatedSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT; - JITEvaluatedSymbol findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT; - Expected findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) JL_NOTSAFEPOINT; - #endif uint64_t getGlobalValueAddress(StringRef Name) JL_NOTSAFEPOINT; uint64_t getFunctionAddress(StringRef Name) JL_NOTSAFEPOINT; StringRef getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl_code_instance_t *codeinst) JL_NOTSAFEPOINT; @@ -660,10 +646,6 @@ Module &jl_codegen_params_t::shared_module() JL_NOTSAFEPOINT { } void fixupTM(TargetMachine &TM) JL_NOTSAFEPOINT; -#if JL_LLVM_VERSION < 170000 -void SetOpaquePointer(LLVMContext &ctx) JL_NOTSAFEPOINT; -#endif - void optimizeDLSyms(Module &M); // NewPM diff --git a/src/jlapi.c b/src/jlapi.c index a3621385a437e..defb2db6ac911 100644 --- a/src/jlapi.c +++ b/src/jlapi.c @@ -809,6 +809,28 @@ JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns(void) return jl_atomic_load_relaxed(&jl_cumulative_recompile_time); } +/** + * @brief Enable per-task timing. + */ +JL_DLLEXPORT void jl_task_metrics_enable(void) +{ + // Increment the flag to allow reentrant callers. + jl_atomic_fetch_add(&jl_task_metrics_enabled, 1); +} + +/** + * @brief Disable per-task timing. + */ +JL_DLLEXPORT void jl_task_metrics_disable(void) +{ + // Prevent decrementing the counter below zero + uint8_t enabled = jl_atomic_load_relaxed(&jl_task_metrics_enabled); + while (enabled > 0) { + if (jl_atomic_cmpswap(&jl_task_metrics_enabled, &enabled, enabled-1)) + break; + } +} + /** * @brief Retrieve floating-point environment constants. * diff --git a/src/jlfrontend.scm b/src/jlfrontend.scm index 3d46940d6fcbb..9c69da199c0cd 100644 --- a/src/jlfrontend.scm +++ b/src/jlfrontend.scm @@ -31,7 +31,6 @@ ;; this is overwritten when we run in actual julia (define (defined-julia-global v) #f) -(define (nothrow-julia-global v) #f) ;; parser entry points diff --git a/src/jloptions.c b/src/jloptions.c index f81cf0453db21..c68b5ce193d98 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -152,6 +152,7 @@ JL_DLLEXPORT void jl_init_options(void) 0, // heap-size-hint 0, // trace_compile_timing JL_TRIM_NO, // trim + 0, // task_metrics }; jl_options_initialized = 1; } @@ -316,6 +317,7 @@ static const char opts_hidden[] = " comment if color is not supported\n" " --trace-compile-timing If --trace-compile is enabled show how long each took to\n" " compile in ms\n" + " --task-metrics={yes|no*} Enable collection of per-task timing data.\n" " --image-codegen Force generate code in imaging mode\n" " --permalloc-pkgimg={yes|no*} Copy the data section of package images into memory\n" " --trim={no*|safe|unsafe|unsafe-warn}\n" @@ -347,6 +349,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) opt_trace_compile, opt_trace_compile_timing, opt_trace_dispatch, + opt_task_metrics, opt_math_mode, opt_worker, opt_bind_to, @@ -427,6 +430,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) { "trace-compile", required_argument, 0, opt_trace_compile }, { "trace-compile-timing", no_argument, 0, opt_trace_compile_timing }, { "trace-dispatch", required_argument, 0, opt_trace_dispatch }, + { "task-metrics", required_argument, 0, opt_task_metrics }, { "math-mode", required_argument, 0, opt_math_mode }, { "handle-signals", required_argument, 0, opt_handle_signals }, // hidden command line options @@ -978,6 +982,14 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) else jl_errorf("julia: invalid argument to --trim={safe|no|unsafe|unsafe-warn} (%s)", optarg); break; + case opt_task_metrics: + if (!strcmp(optarg, "no")) + jl_options.task_metrics = JL_OPTIONS_TASK_METRICS_OFF; + else if (!strcmp(optarg, "yes")) + jl_options.task_metrics = JL_OPTIONS_TASK_METRICS_ON; + else + jl_errorf("julia: invalid argument to --task-metrics={yes|no} (%s)", optarg); + break; default: jl_errorf("julia: unhandled option -- %c\n" "This is a bug, please report it.", c); diff --git a/src/jloptions.h b/src/jloptions.h index b9910702f3f9b..211122242cbbd 100644 --- a/src/jloptions.h +++ b/src/jloptions.h @@ -65,6 +65,7 @@ typedef struct { uint64_t heap_size_hint; int8_t trace_compile_timing; int8_t trim; + int8_t task_metrics; } jl_options_t; #endif diff --git a/src/jltypes.c b/src/jltypes.c index 6c6325d84a5ff..abd1f62092035 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -3746,7 +3746,7 @@ void jl_init_types(void) JL_GC_DISABLED NULL, jl_any_type, jl_emptysvec, - jl_perm_symsvec(16, + jl_perm_symsvec(27, "next", "queue", "storage", @@ -3754,16 +3754,27 @@ void jl_init_types(void) JL_GC_DISABLED "result", "scope", "code", + "_state", + "sticky", + "priority", + "_isexception", + "pad00", + "pad01", + "pad02", "rngState0", "rngState1", "rngState2", "rngState3", "rngState4", - "_state", - "sticky", - "_isexception", - "priority"), - jl_svec(16, + "metrics_enabled", + "pad10", + "pad11", + "pad12", + "first_enqueued_at", + "last_started_running_at", + "running_time_ns", + "finished_at"), + jl_svec(27, jl_any_type, jl_any_type, jl_any_type, @@ -3771,21 +3782,36 @@ void jl_init_types(void) JL_GC_DISABLED jl_any_type, jl_any_type, jl_any_type, + jl_uint8_type, + jl_bool_type, + jl_uint16_type, + jl_bool_type, + jl_uint8_type, + jl_uint8_type, + jl_uint8_type, jl_uint64_type, jl_uint64_type, jl_uint64_type, jl_uint64_type, jl_uint64_type, - jl_uint8_type, jl_bool_type, - jl_bool_type, - jl_uint16_type), + jl_uint8_type, + jl_uint8_type, + jl_uint8_type, + jl_uint64_type, + jl_uint64_type, + jl_uint64_type, + jl_uint64_type), jl_emptysvec, 0, 1, 6); XX(task); jl_value_t *listt = jl_new_struct(jl_uniontype_type, jl_task_type, jl_nothing_type); jl_svecset(jl_task_type->types, 0, listt); - const static uint32_t task_atomicfields[1] = {0x00001000}; // Set fields 13 as atomic + // Set field 20 (metrics_enabled) as const + // Set fields 8 (_state) and 24-27 (metric counters) as atomic + const static uint32_t task_constfields[1] = { 0b00000000000010000000000000000000 }; + const static uint32_t task_atomicfields[1] = { 0b00000111100000000000000010000000 }; + jl_task_type->name->constfields = task_constfields; jl_task_type->name->atomicfields = task_atomicfields; tv = jl_svec2(tvar("A"), tvar("R")); diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index 72e97da3c2daa..c522a565f462a 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -4279,6 +4279,7 @@ f(x) = yt(x) (if (or exists (and short (pair? alldefs))) `(toplevel-butfirst (null) + ,@(map (lambda (v) `(moved-local ,v)) moved-vars) ,@sp-inits ,@mk-method (latestworld)) @@ -4355,7 +4356,6 @@ f(x) = yt(x) (define (valid-ir-argument? e) (or (simple-atom? e) - (and (globalref? e) (nothrow-julia-global (cadr e) (caddr e))) (and (pair? e) (memq (car e) '(quote inert top core slot static_parameter))))) @@ -4606,14 +4606,20 @@ f(x) = yt(x) (cdr cnd) (list cnd)))))) tests)) + (define (emit-assignment-or-setglobal lhs rhs) + (if (globalref? lhs) + (begin + (emit `(global ,lhs)) + (emit `(call (top setglobal!) ,(cadr lhs) (inert ,(caddr lhs)) ,rhs))) + (emit `(= ,lhs ,rhs)))) (define (emit-assignment lhs rhs) (if rhs (if (valid-ir-rvalue? lhs rhs) - (emit `(= ,lhs ,rhs)) + (emit-assignment-or-setglobal lhs rhs) (let ((rr (make-ssavalue))) (emit `(= ,rr ,rhs)) - (emit `(= ,lhs ,rr)))) - (emit `(= ,lhs (null)))) ; in unreachable code (such as after return), still emit the assignment so that the structure of those uses is preserved + (emit-assignment-or-setglobal lhs rr))) + (emit-assignment-or-setglobal lhs `(null))) ; in unreachable code (such as after return), still emit the assignment so that the structure of those uses is preserved #f) ;; the interpreter loop. `break-labels` keeps track of the labels to jump to ;; for all currently closing break-blocks. @@ -4692,7 +4698,7 @@ f(x) = yt(x) rhs (make-ssavalue)))) (if (not (eq? rr rhs)) (emit `(= ,rr ,rhs))) - (emit `(= ,lhs ,rr)) + (emit-assignment-or-setglobal lhs rr) (if tail (emit-return tail rr)) rr) (emit-assignment lhs rhs)))))) diff --git a/src/julia.h b/src/julia.h index 944fd3c43a297..a9864aad16ccc 100644 --- a/src/julia.h +++ b/src/julia.h @@ -279,7 +279,7 @@ typedef union __jl_purity_overrides_t { } _jl_purity_overrides_t; #define NUM_EFFECTS_OVERRIDES 11 -#define NUM_IR_FLAGS 13 +#define NUM_IR_FLAGS 3 // This type describes a single function body typedef struct _jl_code_info_t { @@ -292,15 +292,8 @@ typedef struct _jl_code_info_t { // 1 << 0 = inbounds region // 1 << 1 = callsite inline region // 1 << 2 = callsite noinline region - // 1 << 3 = refined statement - // 1 << 4 = :consistent - // 1 << 5 = :effect_free - // 1 << 6 = :nothrow - // 1 << 7 = :terminates - // 1 << 8 = :noub - // 1 << 9 = :effect_free_if_inaccessiblememonly - // 1 << 10 = :inaccessiblemem_or_argmemonly - // 1 << 11-19 = callsite effects overrides + // 1 << 3-14 = purity + // 1 << 16+ = reserved for inference // miscellaneous data: jl_array_t *slotnames; // names of local variables jl_array_t *slotflags; // local var bit flags @@ -2283,16 +2276,25 @@ typedef struct _jl_task_t { jl_value_t *result; jl_value_t *scope; jl_function_t *start; - // 4 byte padding on 32-bit systems - // uint32_t padding0; - uint64_t rngState[JL_RNG_SIZE]; _Atomic(uint8_t) _state; uint8_t sticky; // record whether this Task can be migrated to a new thread - _Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with - // 1 byte padding - // uint8_t padding1; - // multiqueue priority uint16_t priority; + _Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with + uint8_t pad0[3]; + // === 64 bytes (cache line) + uint64_t rngState[JL_RNG_SIZE]; + // flag indicating whether or not to record timing metrics for this task + uint8_t metrics_enabled; + uint8_t pad1[3]; + // timestamp this task first entered the run queue + _Atomic(uint64_t) first_enqueued_at; + // timestamp this task was most recently scheduled to run + _Atomic(uint64_t) last_started_running_at; + // time this task has spent running; updated when it yields or finishes. + _Atomic(uint64_t) running_time_ns; + // === 64 bytes (cache line) + // timestamp this task finished (i.e. entered state DONE or FAILED). + _Atomic(uint64_t) finished_at; // hidden state: // cached floating point environment @@ -2619,6 +2621,9 @@ JL_DLLEXPORT int jl_generating_output(void) JL_NOTSAFEPOINT; #define JL_TRIM_UNSAFE 2 #define JL_TRIM_UNSAFE_WARN 3 +#define JL_OPTIONS_TASK_METRICS_OFF 0 +#define JL_OPTIONS_TASK_METRICS_ON 1 + // Version information #include // Generated file diff --git a/src/julia_internal.h b/src/julia_internal.h index e081c94329deb..4741316093f95 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -316,6 +316,9 @@ extern JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled; extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time; extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time; +// Global *atomic* integer controlling *process-wide* task timing. +extern JL_DLLEXPORT _Atomic(uint8_t) jl_task_metrics_enabled; + #define jl_return_address() ((uintptr_t)__builtin_return_address(0)) STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a) @@ -651,6 +654,7 @@ typedef struct { uint16_t nargsmatchesmethod:1; uint16_t inlining:2; // 0 = use heuristic; 1 = aggressive; 2 = none uint16_t constprop:2; // 0 = use heuristic; 1 = aggressive; 2 = none + uint16_t has_ssaflags:1; } jl_code_info_flags_bitfield_t; typedef union { diff --git a/src/llvm-codegen-shared.h b/src/llvm-codegen-shared.h index a99e18f3e3762..d9551e0552f9c 100644 --- a/src/llvm-codegen-shared.h +++ b/src/llvm-codegen-shared.h @@ -1,5 +1,6 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license +#include #include #include #include @@ -8,30 +9,15 @@ #include #include #include - -#if JL_LLVM_VERSION >= 160000 #include -#endif #include "julia.h" #define STR(csym) #csym #define XSTR(csym) STR(csym) -#if JL_LLVM_VERSION >= 160000 - -#include - -template -using Optional = std::optional; static constexpr std::nullopt_t None = std::nullopt; -#else - -#include - -#endif - enum AddressSpace { Generic = 0, Tracked = 10, @@ -180,7 +166,7 @@ static inline llvm::Instruction *tbaa_decorate(llvm::MDNode *md, llvm::Instructi using namespace llvm; inst->setMetadata(llvm::LLVMContext::MD_tbaa, md); if (llvm::isa(inst) && md && md == get_tbaa_const(md->getContext())) { - inst->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(md->getContext(), None)); + inst->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(md->getContext(), std::nullopt)); } return inst; } @@ -245,11 +231,7 @@ static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_s if (!F) { FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_size->getPointerTo()}, false); F = Function::Create(FT, Function::ExternalLinkage, "julia.safepoint", M); -#if JL_LLVM_VERSION >= 160000 F->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); -#else - F->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); -#endif } builder.CreateCall(F, {signal_page}); } diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp index 05d62adc57926..a6e963664b0f3 100644 --- a/src/llvm-cpufeatures.cpp +++ b/src/llvm-cpufeatures.cpp @@ -37,7 +37,7 @@ STATISTIC(LoweredWithoutFMA, "Number of have_fma's that were lowered to false"); extern JuliaOJIT *jl_ExecutionEngine; // whether this platform unconditionally (i.e. without needing multiversioning) supports FMA -Optional always_have_fma(Function &intr, const Triple &TT) JL_NOTSAFEPOINT { +std::optional always_have_fma(Function &intr, const Triple &TT) JL_NOTSAFEPOINT { if (TT.isAArch64()) { auto intr_name = intr.getName(); auto typ = intr_name.substr(strlen("julia.cpu.have_fma.")); diff --git a/src/llvm-julia-licm.cpp b/src/llvm-julia-licm.cpp index 8d80f7fd54877..baf844dffa89c 100644 --- a/src/llvm-julia-licm.cpp +++ b/src/llvm-julia-licm.cpp @@ -342,11 +342,7 @@ struct JuliaLICM : public JuliaPassContext { } } if (changed && SE) { -#if JL_LLVM_VERSION >= 160000 SE->forgetLoopDispositions(); -#else - SE->forgetLoopDispositions(L); -#endif } #ifdef JL_VERIFY_PASSES assert(!verifyLLVMIR(*L)); diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index d95cc9c49b698..ff2cac6e49406 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1163,9 +1163,6 @@ State LateLowerGCFrame::LocalScan(Function &F) { } if (CI->hasStructRetAttr()) { Type *ElT = getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType(); - #if JL_LLVM_VERSION < 170000 - assert(cast(CI->getArgOperand(0)->getType())->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType())); - #endif auto tracked = CountTrackedPointers(ElT, true); if (tracked.count) { AllocaInst *SRet = dyn_cast((CI->arg_begin()[0])->stripInBoundsOffsets()); @@ -1252,38 +1249,20 @@ State LateLowerGCFrame::LocalScan(Function &F) { callee->getName() == "memcmp") { continue; } -#if JL_LLVM_VERSION >= 160000 if (callee->getMemoryEffects().onlyReadsMemory() || callee->getMemoryEffects().onlyAccessesArgPointees()) { continue; } -#else - if (callee->hasFnAttribute(Attribute::ReadNone) || - callee->hasFnAttribute(Attribute::ReadOnly) || - callee->hasFnAttribute(Attribute::ArgMemOnly)) { - continue; - } -#endif if (MemTransferInst *MI = dyn_cast(CI)) { MaybeTrackDst(S, MI); } } -#if JL_LLVM_VERSION >= 160000 if (isa(CI) || CI->getMemoryEffects().onlyAccessesArgPointees() || CI->getMemoryEffects().onlyReadsMemory()) { // Intrinsics are never safepoints. continue; } -#else - if (isa(CI) || - CI->hasFnAttr(Attribute::ArgMemOnly) || - CI->hasFnAttr(Attribute::ReadNone) || - CI->hasFnAttr(Attribute::ReadOnly)) { - // Intrinsics are never safepoints. - continue; - } -#endif SmallVector CalleeRoots; for (Use &U : CI->args()) { // Find all callee rooted arguments. diff --git a/src/llvm-lower-handlers.cpp b/src/llvm-lower-handlers.cpp index e09ea892ee488..c359bf6c117ce 100644 --- a/src/llvm-lower-handlers.cpp +++ b/src/llvm-lower-handlers.cpp @@ -8,11 +8,7 @@ #include #include -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include #include diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp index 22ef973decfe9..a76d076ebd6f3 100644 --- a/src/llvm-multiversioning.cpp +++ b/src/llvm-multiversioning.cpp @@ -15,11 +15,7 @@ #include #include #include -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include #include @@ -51,7 +47,7 @@ using namespace llvm; -extern Optional always_have_fma(Function&, const Triple &TT); +extern std::optional always_have_fma(Function&, const Triple &TT); namespace { constexpr uint32_t clone_mask = @@ -185,7 +181,7 @@ struct TargetSpec { } }; -static Optional> get_target_specs(Module &M) { +static std::optional> get_target_specs(Module &M) { auto md = M.getModuleFlag("julia.mv.specs"); if (!md) return None; diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index 4e9e4826b4f75..ca25251040fb2 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -129,9 +129,7 @@ namespace jl_intrinsics { static Function *addGCAllocAttributes(Function *target) { auto FnAttrs = AttrBuilder(target->getContext()); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref) | MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef)); -#endif FnAttrs.addAllocKindAttr(AllocFnKind::Alloc); FnAttrs.addAttribute(Attribute::WillReturn); FnAttrs.addAttribute(Attribute::NoUnwind); @@ -228,11 +226,7 @@ namespace jl_intrinsics { false), Function::ExternalLinkage, QUEUE_GC_ROOT_NAME); -#if JL_LLVM_VERSION >= 160000 intrinsic->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); -#else - intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); -#endif return intrinsic; }); @@ -248,11 +242,7 @@ namespace jl_intrinsics { false), Function::ExternalLinkage, SAFEPOINT_NAME); -#if JL_LLVM_VERSION >= 160000 intrinsic->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); -#else - intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); -#endif return intrinsic; }); } @@ -309,11 +299,7 @@ namespace jl_well_known { false), Function::ExternalLinkage, GC_QUEUE_ROOT_NAME); -#if JL_LLVM_VERSION >= 160000 func->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); -#else - func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); -#endif return func; }); diff --git a/src/llvm-propagate-addrspaces.cpp b/src/llvm-propagate-addrspaces.cpp index 4e5a2ee5e0d54..06a52ad3dcb43 100644 --- a/src/llvm-propagate-addrspaces.cpp +++ b/src/llvm-propagate-addrspaces.cpp @@ -163,22 +163,14 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc Instruction *InstV = cast(V); Instruction *NewV = InstV->clone(); ToInsert.push_back(std::make_pair(NewV, InstV)); - #if JL_LLVM_VERSION >= 170000 Type *NewRetTy = PointerType::get(InstV->getType(), allocaAddressSpace); - #else - Type *NewRetTy = PointerType::getWithSamePointeeType(cast(InstV->getType()), allocaAddressSpace); - #endif NewV->mutateType(NewRetTy); LiftingMap[InstV] = NewV; ToRevisit.push_back(NewV); } } auto CollapseCastsAndLift = [&](Value *CurrentV, Instruction *InsertPt) -> Value * { - #if JL_LLVM_VERSION >= 170000 PointerType *TargetType = PointerType::get(CurrentV->getType(), allocaAddressSpace); - #else - PointerType *TargetType = PointerType::getWithSamePointeeType(cast(CurrentV->getType()), allocaAddressSpace); - #endif while (!LiftingMap.count(CurrentV)) { if (isa(CurrentV)) CurrentV = cast(CurrentV)->getOperand(0); @@ -192,17 +184,7 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc } if (LiftingMap.count(CurrentV)) CurrentV = LiftingMap[CurrentV]; - #if JL_LLVM_VERSION >= 170000 assert(CurrentV->getType() == TargetType); - #else - if (CurrentV->getType() != TargetType) { - // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine - assert(CurrentV->getContext().supportsTypedPointers()); - auto *BCI = new BitCastInst(CurrentV, TargetType); - ToInsert.push_back(std::make_pair(BCI, InsertPt)); - CurrentV = BCI; - } - #endif return CurrentV; }; diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp index 614ed15f840e6..e36136859517a 100644 --- a/src/llvm-ptls.cpp +++ b/src/llvm-ptls.cpp @@ -9,11 +9,7 @@ #include #include -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include #include @@ -67,11 +63,7 @@ struct LowerPTLS { void LowerPTLS::set_pgcstack_attrs(CallInst *pgcstack) const { -#if JL_LLVM_VERSION >= 160000 pgcstack->addFnAttr(Attribute::getWithMemoryEffects(pgcstack->getContext(), MemoryEffects::none())); -#else - addFnAttr(pgcstack, Attribute::ReadNone); -#endif addFnAttr(pgcstack, Attribute::NoUnwind); } diff --git a/src/llvm-remove-addrspaces.cpp b/src/llvm-remove-addrspaces.cpp index 89ae1d292d108..bb492f467e74c 100644 --- a/src/llvm-remove-addrspaces.cpp +++ b/src/llvm-remove-addrspaces.cpp @@ -44,19 +44,7 @@ class AddrspaceRemoveTypeRemapper : public ValueMapTypeRemapper { DstTy = SrcTy; if (auto Ty = dyn_cast(SrcTy)) { - #if JL_LLVM_VERSION >= 170000 DstTy = PointerType::get(Ty->getContext(), ASRemapper(Ty->getAddressSpace())); - #else - if (Ty->isOpaque()) { - DstTy = PointerType::get(Ty->getContext(), ASRemapper(Ty->getAddressSpace())); - } - else { - //Remove once opaque pointer transition is complete - DstTy = PointerType::get( - remapType(Ty->getNonOpaquePointerElementType()), - ASRemapper(Ty->getAddressSpace())); - } - #endif } else if (auto Ty = dyn_cast(SrcTy)) { SmallVector Params; @@ -157,24 +145,8 @@ class AddrspaceRemoveValueMaterializer : public ValueMaterializer { Ops.push_back(NewOp ? cast(NewOp) : Op); } - #if JL_LLVM_VERSION >= 170000 if (CE->getOpcode() != Instruction::GetElementPtr) DstV = CE->getWithOperands(Ops, Ty); - #else - if (CE->getOpcode() == Instruction::GetElementPtr) { - // GEP const exprs need to know the type of the source. - // asserts remapType(typeof arg0) == typeof mapValue(arg0). - Constant *Src = CE->getOperand(0); - auto ptrty = cast(Src->getType()->getScalarType()); - //Remove once opaque pointer transition is complete - if (!ptrty->isOpaque()) { - Type *SrcTy = remapType(ptrty->getNonOpaquePointerElementType()); - DstV = CE->getWithOperands(Ops, Ty, false, SrcTy); - } - } - else - DstV = CE->getWithOperands(Ops, Ty); - #endif } } diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index e12b30e3db466..3faa9d9728e67 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -297,11 +297,7 @@ static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution } if (SE) -#if JL_LLVM_VERSION >= 160000 SE->forgetLoopDispositions(); -#else - SE->forgetLoopDispositions(&L); -#endif } #ifdef JL_VERIFY_PASSES diff --git a/src/llvm-version.h b/src/llvm-version.h index 984e918d480cc..061d80deb02f9 100644 --- a/src/llvm-version.h +++ b/src/llvm-version.h @@ -10,12 +10,8 @@ #define JL_LLVM_VERSION (LLVM_VERSION_MAJOR * 10000 + LLVM_VERSION_MINOR * 100 \ + LLVM_VERSION_PATCH) -#if JL_LLVM_VERSION < 150000 - #error Only LLVM versions >= 15.0.0 are supported by Julia -#endif - -#if JL_LLVM_VERSION >= 160000 - #define JL_LLVM_OPAQUE_POINTERS 1 +#if JL_LLVM_VERSION < 170000 + #error Only LLVM versions >= 17.0.0 are supported by Julia #endif #if JL_LLVM_VERSION < 19000 && defined(_CPU_RISCV64_) diff --git a/src/pipeline.cpp b/src/pipeline.cpp index f8976099ee53c..8c9054c0d65ff 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -150,15 +150,9 @@ namespace { // Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn(); // MPM.addPass(RequireAnalysisPass()); //Let's assume the defaults are actually fine for our purposes - #if JL_LLVM_VERSION < 160000 - // MPM.addPass(ModuleAddressSanitizerPass( - // Opts, UseGlobalGC, UseOdrIndicator, DestructorKind)); - MPM.addPass(ModuleAddressSanitizerPass(AddressSanitizerOptions())); - #else // LLVM 16+ // MPM.addPass(AddressSanitizerPass( // Opts, UseGlobalGC, UseOdrIndicator, DestructorKind)); MPM.addPass(AddressSanitizerPass(AddressSanitizerOptions(), true, false)); - #endif // } }; ASanPass(/*SanitizerKind::Address, */false); @@ -347,12 +341,8 @@ static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder FPM.addPass(DCEPass()); FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); if (O.getSpeedupLevel() >= 1) { -#if JL_LLVM_VERSION >= 160000 // TODO check the LLVM 15 default. FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); -#else - FPM.addPass(SROAPass()); -#endif } MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } @@ -389,12 +379,8 @@ static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB, if (O.getSpeedupLevel() >= 1) { FunctionPassManager FPM; if (O.getSpeedupLevel() >= 2) { -#if JL_LLVM_VERSION >= 160000 // TODO check the LLVM 15 default. FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); -#else - FPM.addPass(SROAPass()); -#endif // SROA can duplicate PHI nodes which can block LowerSIMD FPM.addPass(InstCombinePass()); FPM.addPass(JumpThreadingPass()); @@ -468,12 +454,8 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder * if (options.enable_scalar_optimizations) { if (O.getSpeedupLevel() >= 2) { JULIA_PASS(FPM.addPass(AllocOptPass())); - #if JL_LLVM_VERSION >= 160000 // TODO check the LLVM 15 default. FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); - #else - FPM.addPass(SROAPass()); - #endif FPM.addPass(InstSimplifyPass()); FPM.addPass(GVNPass()); FPM.addPass(MemCpyOptPass()); @@ -737,12 +719,7 @@ void NewPM::run(Module &M) { //We must recreate the analysis managers every time //so that analyses from previous runs of the pass manager //do not hang around for the next run -#if JL_LLVM_VERSION >= 160000 StandardInstrumentations SI(M.getContext(),false); -#else - StandardInstrumentations SI(false); -#endif -#if JL_LLVM_VERSION >= 170000 PassInstrumentationCallbacks PIC; adjustPIC(PIC); TimePasses.registerCallbacks(PIC); @@ -752,17 +729,6 @@ void NewPM::run(Module &M) { ModuleAnalysisManager MAM; SI.registerCallbacks(PIC, &MAM); SI.getTimePasses().setOutStream(nulls()); //TODO: figure out a better way of doing this -#else - FunctionAnalysisManager FAM(createFAM(O, *TM.get())); - PassInstrumentationCallbacks PIC; - adjustPIC(PIC); - TimePasses.registerCallbacks(PIC); - SI.registerCallbacks(PIC, &FAM); - SI.getTimePasses().setOutStream(nulls()); //TODO: figure out a better way of doing this - LoopAnalysisManager LAM; - CGSCCAnalysisManager CGAM; - ModuleAnalysisManager MAM; -#endif PassBuilder PB(TM.get(), PipelineTuningOptions(), None, &PIC); PB.registerLoopAnalyses(LAM); PB.registerFunctionAnalyses(FAM); @@ -794,7 +760,7 @@ OptimizationLevel getOptLevel(int optlevel) { } //This part is also basically stolen from LLVM's PassBuilder.cpp file -static Optional> parseJuliaPipelineOptions(StringRef name) { +static std::optional> parseJuliaPipelineOptions(StringRef name) { if (name.consume_front("julia")) { auto O = OptimizationLevel::O2; auto options = OptimizationOptions::defaults(); diff --git a/src/processor.cpp b/src/processor.cpp index bc12f5b54be19..3edebcc2f3ae6 100644 --- a/src/processor.cpp +++ b/src/processor.cpp @@ -7,11 +7,7 @@ #include #include #include -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include @@ -162,11 +158,7 @@ struct FeatureList { { int cnt = 0; for (size_t i = 0; i < n; i++) - #if JL_LLVM_VERSION >= 170000 cnt += llvm::popcount(eles[i]); - #else - cnt += llvm::countPopulation(eles[i]); - #endif return cnt; } inline bool empty() const diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp index 56f8487d8fb73..2a6cb00961594 100644 --- a/src/runtime_ccall.cpp +++ b/src/runtime_ccall.cpp @@ -4,11 +4,7 @@ #include #include #include -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include "julia.h" diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 450096eef5b01..80281b733bf44 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -1574,7 +1574,6 @@ bi_iintrinsic_cnvtb_fast(LLVMAShr, ashr_op, ashr_int, , 1) //un_iintrinsic_fast(LLVMByteSwap, bswap_op, bswap_int, u) un_iintrinsic_slow(LLVMByteSwap, bswap_int, u) //#define ctpop_op(a) __builtin_ctpop(a) -#if JL_LLVM_VERSION >= 170000 //uu_iintrinsic_fast(LLVMPopcount, ctpop_op, ctpop_int, u) uu_iintrinsic_slow(LLVMPopcount, ctpop_int, u) //#define ctlz_op(a) __builtin_ctlz(a) @@ -1583,16 +1582,6 @@ uu_iintrinsic_slow(LLVMCountl_zero, ctlz_int, u) //#define cttz_op(a) __builtin_cttz(a) //uu_iintrinsic_fast(LLVMCountr_zero, cttz_op, cttz_int, u) uu_iintrinsic_slow(LLVMCountr_zero, cttz_int, u) -#else -//uu_iintrinsic_fast(LLVMCountPopulation, ctpop_op, ctpop_int, u) -uu_iintrinsic_slow(LLVMCountPopulation, ctpop_int, u) -//#define ctlz_op(a) __builtin_ctlz(a) -//uu_iintrinsic_fast(LLVMCountLeadingZeros, ctlz_op, ctlz_int, u) -uu_iintrinsic_slow(LLVMCountLeadingZeros, ctlz_int, u) -//#define cttz_op(a) __builtin_cttz(a) -//uu_iintrinsic_fast(LLVMCountTrailingZeros, cttz_op, cttz_int, u) -uu_iintrinsic_slow(LLVMCountTrailingZeros, cttz_int, u) -#endif #define not_op(a) ~a un_iintrinsic_fast(LLVMFlipAllBits, not_op, not_int, u) diff --git a/src/serialize.h b/src/serialize.h index 3aa82a1d09a9b..549c1588073ff 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -7,69 +7,6 @@ extern "C" { #endif -#define TAG_SYMBOL 2 -#define TAG_SSAVALUE 3 -#define TAG_DATATYPE 4 -#define TAG_SLOTNUMBER 5 -#define TAG_SVEC 6 -#define TAG_ARRAY 7 -#define TAG_NULL 8 -#define TAG_EXPR 9 -#define TAG_PHINODE 10 -#define TAG_PHICNODE 11 -#define TAG_LONG_SYMBOL 12 -#define TAG_LONG_SVEC 13 -#define TAG_LONG_EXPR 14 -#define TAG_LONG_PHINODE 15 -#define TAG_LONG_PHICNODE 16 -#define TAG_METHODROOT 17 -#define TAG_EDGE 18 -#define TAG_STRING 19 -#define TAG_SHORT_INT64 20 -#define TAG_SHORT_GENERAL 21 -#define TAG_CNULL 22 -#define TAG_ARRAY1D 23 -#define TAG_SINGLETON 24 -#define TAG_MODULE 25 -#define TAG_TVAR 26 -#define TAG_METHOD_INSTANCE 27 -#define TAG_METHOD 28 -#define TAG_CODE_INSTANCE 29 -#define TAG_COMMONSYM 30 -#define TAG_NEARBYGLOBAL 31 -#define TAG_GLOBALREF 32 -#define TAG_CORE 33 -#define TAG_BASE 34 -#define TAG_BITYPENAME 35 -#define TAG_NEARBYMODULE 36 -#define TAG_INT32 37 -#define TAG_INT64 38 -#define TAG_UINT8 39 -#define TAG_VECTORTY 40 -#define TAG_PTRTY 41 -#define TAG_LONG_SSAVALUE 42 -#define TAG_LONG_METHODROOT 43 -#define TAG_LONG_EDGE 44 -#define TAG_SHORTER_INT64 45 -#define TAG_SHORT_INT32 46 -#define TAG_CALL1 47 -#define TAG_CALL2 48 -#define TAG_SHORT_BACKREF 49 -#define TAG_BACKREF 50 -#define TAG_UNIONALL 51 -#define TAG_GOTONODE 52 -#define TAG_QUOTENODE 53 -#define TAG_GENERAL 54 -#define TAG_GOTOIFNOT 55 -#define TAG_RETURNNODE 56 -#define TAG_ARGUMENT 57 -#define TAG_RELOC_METHODROOT 58 -#define TAG_BINDING 59 -#define TAG_MEMORYT 60 -#define TAG_ENTERNODE 61 - -#define LAST_TAG 61 - #define write_uint8(s, n) ios_putc((n), (s)) #define read_uint8(s) ((uint8_t)ios_getc((s))) #define write_int8(s, n) write_uint8((s), (n)) @@ -137,12 +74,6 @@ static inline uint32_t read_uint32(ios_t *s) JL_NOTSAFEPOINT #define read_uint(s) read_uint32(s) #endif - -void *jl_lookup_ser_tag(jl_value_t *v); -void *jl_lookup_common_symbol(jl_value_t *v); -jl_value_t *jl_deser_tag(uint8_t tag); -jl_value_t *jl_deser_symbol(uint8_t tag); - #ifdef __cplusplus } #endif diff --git a/src/support/win32-clang-ABI-bug/optional b/src/support/win32-clang-ABI-bug/optional index a2ecdad1e33ce..fd2f7646e1766 100644 --- a/src/support/win32-clang-ABI-bug/optional +++ b/src/support/win32-clang-ABI-bug/optional @@ -57,7 +57,6 @@ namespace optional_detail { // // The move constructible / assignable conditions emulate the remaining behavior // of std::is_trivially_copyable. -#if JL_LLVM_VERSION >= 170000 template ::value && std::is_trivially_copy_assignable::value && @@ -65,15 +64,6 @@ template ::value) && (std::is_trivially_move_assignable::value || !std::is_move_assignable::value))> -#else -template ::value && - std::is_trivially_copy_assignable::value && - (llvm::is_trivially_move_constructible::value || - !std::is_move_constructible::value) && - (std::is_trivially_move_assignable::value || - !std::is_move_assignable::value))> -#endif class OptionalStorage { union { char empty; diff --git a/src/task.c b/src/task.c index 5e1172a96a409..1a50d6fcbcf65 100644 --- a/src/task.c +++ b/src/task.c @@ -313,6 +313,13 @@ void JL_NORETURN jl_finish_task(jl_task_t *ct) { JL_PROBE_RT_FINISH_TASK(ct); JL_SIGATOMIC_BEGIN(); + if (ct->metrics_enabled) { + // [task] user_time -finished-> wait_time + assert(jl_atomic_load_relaxed(&ct->first_enqueued_at) != 0); + uint64_t now = jl_hrtime(); + jl_atomic_store_relaxed(&ct->finished_at, now); + jl_atomic_fetch_add_relaxed(&ct->running_time_ns, now - jl_atomic_load_relaxed(&ct->last_started_running_at)); + } if (jl_atomic_load_relaxed(&ct->_isexception)) jl_atomic_store_release(&ct->_state, JL_TASK_STATE_FAILED); else @@ -1146,6 +1153,11 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion t->ptls = NULL; t->world_age = ct->world_age; t->reentrant_timing = 0; + t->metrics_enabled = jl_atomic_load_relaxed(&jl_task_metrics_enabled) != 0; + jl_atomic_store_relaxed(&t->first_enqueued_at, 0); + jl_atomic_store_relaxed(&t->last_started_running_at, 0); + jl_atomic_store_relaxed(&t->running_time_ns, 0); + jl_atomic_store_relaxed(&t->finished_at, 0); jl_timing_task_init(t); if (t->ctx.copy_stack) @@ -1245,6 +1257,12 @@ CFI_NORETURN fesetenv(&ct->fenv); ct->ctx.started = 1; + if (ct->metrics_enabled) { + // [task] wait_time -started-> user_time + assert(jl_atomic_load_relaxed(&ct->first_enqueued_at) != 0); + assert(jl_atomic_load_relaxed(&ct->last_started_running_at) == 0); + jl_atomic_store_relaxed(&ct->last_started_running_at, jl_hrtime()); + } JL_PROBE_RT_START_TASK(ct); jl_timing_block_task_enter(ct, ptls, NULL); if (jl_atomic_load_relaxed(&ct->_isexception)) { @@ -1596,6 +1614,19 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi) ct->ptls = ptls; ct->world_age = 1; // OK to run Julia code on this task ct->reentrant_timing = 0; + jl_atomic_store_relaxed(&ct->running_time_ns, 0); + jl_atomic_store_relaxed(&ct->finished_at, 0); + ct->metrics_enabled = jl_atomic_load_relaxed(&jl_task_metrics_enabled) != 0; + if (ct->metrics_enabled) { + // [task] created -started-> user_time + uint64_t now = jl_hrtime(); + jl_atomic_store_relaxed(&ct->first_enqueued_at, now); + jl_atomic_store_relaxed(&ct->last_started_running_at, now); + } + else { + jl_atomic_store_relaxed(&ct->first_enqueued_at, 0); + jl_atomic_store_relaxed(&ct->last_started_running_at, 0); + } ptls->root_task = ct; jl_atomic_store_relaxed(&ptls->current_task, ct); JL_GC_PROMISE_ROOTED(ct); diff --git a/src/threading.c b/src/threading.c index 8f0dfb3330885..ac9cc276d613a 100644 --- a/src/threading.c +++ b/src/threading.c @@ -49,6 +49,8 @@ JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled = 0; JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time = 0; JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time = 0; +JL_DLLEXPORT _Atomic(uint8_t) jl_task_metrics_enabled = 0; + JL_DLLEXPORT void *jl_get_ptls_states(void) { // mostly deprecated: use current_task instead diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version index 8b40c45c4366f..9ef9ca4b04376 100644 --- a/stdlib/Pkg.version +++ b/stdlib/Pkg.version @@ -1,4 +1,4 @@ PKG_BRANCH = master -PKG_SHA1 = 7b759d7f0af56c5ad01f2289bbad71284a556970 +PKG_SHA1 = d84a1a38b6466fa7400e9ad2874a0ef963a10456 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1 diff --git a/stdlib/REPL/src/LineEdit.jl b/stdlib/REPL/src/LineEdit.jl index e881a65ca6b1c..e15807f645119 100644 --- a/stdlib/REPL/src/LineEdit.jl +++ b/stdlib/REPL/src/LineEdit.jl @@ -181,7 +181,18 @@ struct EmptyHistoryProvider <: HistoryProvider end reset_state(::EmptyHistoryProvider) = nothing -complete_line(c::EmptyCompletionProvider, s; hint::Bool=false) = String[], "", true +# Before, completions were always given as strings. But at least for backslash +# completions, it's nice to see what glyphs are available in the completion preview. +# To separate between what's shown in the preview list of possible matches, and what's +# actually completed, we introduce this struct. +struct NamedCompletion + completion::String # what is actually completed, for example "\trianglecdot" + name::String # what is displayed in lists of possible completions, for example "◬ \trianglecdot" +end + +NamedCompletion(completion::String) = NamedCompletion(completion, completion) + +complete_line(c::EmptyCompletionProvider, s; hint::Bool=false) = NamedCompletion[], "", true # complete_line can be specialized for only two arguments, when the active module # doesn't matter (e.g. Pkg does this) @@ -308,6 +319,7 @@ end set_action!(s, command::Symbol) = nothing +common_prefix(completions::Vector{NamedCompletion}) = common_prefix(map(x -> x.completion, completions)) function common_prefix(completions::Vector{String}) ret = "" c1 = completions[1] @@ -330,6 +342,8 @@ end # does not restrict column length when multiple columns are used. const MULTICOLUMN_THRESHOLD = 5 +show_completions(s::PromptState, completions::Vector{NamedCompletion}) = show_completions(s, map(x -> x.name, completions)) + # Show available completions function show_completions(s::PromptState, completions::Vector{String}) # skip any lines of input after the cursor @@ -374,6 +388,18 @@ function complete_line(s::MIState) end end +# due to close coupling of the Pkg ReplExt `complete_line` can still return a vector of strings, +# so we convert those in this helper +function complete_line_named(args...; kwargs...)::Tuple{Vector{NamedCompletion},String,Bool} + result = complete_line(args...; kwargs...)::Union{Tuple{Vector{NamedCompletion},String,Bool},Tuple{Vector{String},String,Bool}} + if result isa Tuple{Vector{NamedCompletion},String,Bool} + return result + else + completions, partial, should_complete = result + return map(NamedCompletion, completions), partial, should_complete + end +end + function check_for_hint(s::MIState) st = state(s) if !options(st).hint_tab_completes || !eof(buffer(st)) @@ -383,12 +409,14 @@ function check_for_hint(s::MIState) return clear_hint(st) end - completions, partial, should_complete = try - complete_line(st.p.complete, st, s.active_module; hint = true)::Tuple{Vector{String},String,Bool} + named_completions, partial, should_complete = try + complete_line_named(st.p.complete, st, s.active_module; hint = true) catch @debug "error completing line for hint" exception=current_exceptions() return clear_hint(st) end + completions = map(x -> x.completion, named_completions) + isempty(completions) && return clear_hint(st) # Don't complete for single chars, given e.g. `x` completes to `xor` if length(partial) > 1 && should_complete @@ -425,7 +453,7 @@ function clear_hint(s::ModeState) end function complete_line(s::PromptState, repeats::Int, mod::Module; hint::Bool=false) - completions, partial, should_complete = complete_line(s.p.complete, s, mod; hint)::Tuple{Vector{String},String,Bool} + completions, partial, should_complete = complete_line_named(s.p.complete, s, mod; hint) isempty(completions) && return false if !should_complete # should_complete is false for cases where we only want to show @@ -435,7 +463,7 @@ function complete_line(s::PromptState, repeats::Int, mod::Module; hint::Bool=fal # Replace word by completion prev_pos = position(s) push_undo(s) - edit_splice!(s, (prev_pos - sizeof(partial)) => prev_pos, completions[1]) + edit_splice!(s, (prev_pos - sizeof(partial)) => prev_pos, completions[1].completion) else p = common_prefix(completions) if !isempty(p) && p != partial diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index 50f610ff3b3e8..6c3f4bd4ba73a 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -843,7 +843,7 @@ function complete_line(c::REPLCompletionProvider, s::PromptState, mod::Module; h full = LineEdit.input_string(s) ret, range, should_complete = completions(full, lastindex(partial), mod, c.modifiers.shift, hint) c.modifiers = LineEdit.Modifiers() - return unique!(String[completion_text(x) for x in ret]), partial[range], should_complete + return unique!(LineEdit.NamedCompletion[named_completion(x) for x in ret]), partial[range], should_complete end function complete_line(c::ShellCompletionProvider, s::PromptState; hint::Bool=false) @@ -851,14 +851,14 @@ function complete_line(c::ShellCompletionProvider, s::PromptState; hint::Bool=fa partial = beforecursor(s.input_buffer) full = LineEdit.input_string(s) ret, range, should_complete = shell_completions(full, lastindex(partial), hint) - return unique!(String[completion_text(x) for x in ret]), partial[range], should_complete + return unique!(LineEdit.NamedCompletion[named_completion(x) for x in ret]), partial[range], should_complete end function complete_line(c::LatexCompletions, s; hint::Bool=false) partial = beforecursor(LineEdit.buffer(s)) full = LineEdit.input_string(s)::String ret, range, should_complete = bslash_completions(full, lastindex(partial), hint)[2] - return unique!(String[completion_text(x) for x in ret]), partial[range], should_complete + return unique!(LineEdit.NamedCompletion[named_completion(x) for x in ret]), partial[range], should_complete end with_repl_linfo(f, repl) = f(outstream(repl)) diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl index 1f2c0cabbdb38..0bffb1a1015cd 100644 --- a/stdlib/REPL/src/REPLCompletions.jl +++ b/stdlib/REPL/src/REPLCompletions.jl @@ -2,7 +2,7 @@ module REPLCompletions -export completions, shell_completions, bslash_completions, completion_text +export completions, shell_completions, bslash_completions, completion_text, named_completion using Core: Const # We want to insulate the REPLCompletion module from any changes the user may @@ -13,6 +13,8 @@ using Base.Meta using Base: propertynames, something, IdSet using Base.Filesystem: _readdirx +using ..REPL.LineEdit: NamedCompletion + abstract type Completion end struct TextCompletion <: Completion @@ -57,8 +59,10 @@ struct MethodCompletion <: Completion end struct BslashCompletion <: Completion - bslash::String + completion::String # what is actually completed, for example "\trianglecdot" + name::String # what is displayed, for example "◬ \trianglecdot" end +BslashCompletion(completion::String) = BslashCompletion(completion, completion) struct ShellCompletion <: Completion text::String @@ -114,13 +118,21 @@ _completion_text(c::PackageCompletion) = c.package _completion_text(c::PropertyCompletion) = sprint(Base.show_sym, c.property) _completion_text(c::FieldCompletion) = sprint(Base.show_sym, c.field) _completion_text(c::MethodCompletion) = repr(c.method) -_completion_text(c::BslashCompletion) = c.bslash _completion_text(c::ShellCompletion) = c.text _completion_text(c::DictCompletion) = c.key _completion_text(c::KeywordArgumentCompletion) = c.kwarg*'=' completion_text(c) = _completion_text(c)::String +named_completion(c::BslashCompletion) = NamedCompletion(c.completion, c.name) + +function named_completion(c) + text = completion_text(c)::String + return NamedCompletion(text, text) +end + +named_completion_completion(c) = named_completion(c).completion::String + const Completions = Tuple{Vector{Completion}, UnitRange{Int}, Bool} function completes_global(x, name) @@ -984,12 +996,10 @@ function bslash_completions(string::String, pos::Int, hint::Bool=false) end # return possible matches; these cannot be mixed with regular # Julian completions as only latex / emoji symbols contain the leading \ - if startswith(s, "\\:") # emoji - namelist = Iterators.filter(k -> startswith(k, s), keys(emoji_symbols)) - else # latex - namelist = Iterators.filter(k -> startswith(k, s), keys(latex_symbols)) - end - return (true, (Completion[BslashCompletion(name) for name in sort!(collect(namelist))], slashpos:pos, true)) + symbol_dict = startswith(s, "\\:") ? emoji_symbols : latex_symbols + namelist = Iterators.filter(k -> startswith(k, s), keys(symbol_dict)) + completions = Completion[BslashCompletion(name, "$(symbol_dict[name]) $name") for name in sort!(collect(namelist))] + return (true, (completions, slashpos:pos, true)) end return (false, (Completion[], 0:-1, false)) end @@ -1099,7 +1109,7 @@ function complete_keyword_argument(partial::String, last_idx::Int, context_modul complete_keyval!(suggestions, last_word) end - return sort!(suggestions, by=completion_text), wordrange + return sort!(suggestions, by=named_completion_completion), wordrange end function get_loading_candidates(pkgstarts::String, project_file::String) @@ -1298,7 +1308,7 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif complete_identifiers!(suggestions, context_module, string, name, pos, separatorpos, startpos; shift) - return sort!(unique!(completion_text, suggestions), by=completion_text), (separatorpos+1):pos, true + return sort!(unique!(named_completion, suggestions), by=named_completion_completion), (separatorpos+1):pos, true elseif inc_tag === :cmd # TODO: should this call shell_completions instead of partially reimplementing it? let m = match(r"[\t\n\r\"`><=*?|]| (?!\\)", reverse(partial)) # fuzzy shell_parse in reverse @@ -1496,7 +1506,7 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif complete_identifiers!(suggestions, context_module, string, name, pos, separatorpos, startpos; comp_keywords, complete_modules_only, shift) - return sort!(unique!(completion_text, suggestions), by=completion_text), namepos:pos, true + return sort!(unique!(named_completion, suggestions), by=named_completion_completion), namepos:pos, true end function shell_completions(string, pos, hint::Bool=false) diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl index b259567884486..2c8d48cc232cf 100644 --- a/stdlib/REPL/test/replcompletions.jl +++ b/stdlib/REPL/test/replcompletions.jl @@ -170,17 +170,23 @@ end function map_completion_text(completions) c, r, res = completions - return map(completion_text, c), r, res + return map(x -> named_completion(x).completion, c), r, res +end + +function map_named_completion(completions) + c, r, res = completions + return map(named_completion, c), r, res end test_complete(s) = map_completion_text(@inferred(completions(s, lastindex(s)))) test_scomplete(s) = map_completion_text(@inferred(shell_completions(s, lastindex(s)))) -test_bslashcomplete(s) = map_completion_text(@inferred(bslash_completions(s, lastindex(s)))[2]) test_complete_context(s, m=@__MODULE__; shift::Bool=true) = map_completion_text(@inferred(completions(s,lastindex(s), m, shift))) test_complete_foo(s) = test_complete_context(s, Main.CompletionFoo) test_complete_noshift(s) = map_completion_text(@inferred(completions(s, lastindex(s), Main, false))) +test_bslashcomplete(s) = map_named_completion(@inferred(bslash_completions(s, lastindex(s)))[2]) + test_methods_list(@nospecialize(f), tt) = map(x -> string(x.method), Base._methods_by_ftype(Base.signature_type(f, tt), 10, Base.get_world_counter())) @@ -350,7 +356,8 @@ end # test latex symbol completions let s = "\\alpha" c, r = test_bslashcomplete(s) - @test c[1] == "α" + @test c[1].completion == "α" + @test c[1].name == "α" @test r == 1:lastindex(s) @test length(c) == 1 end @@ -358,7 +365,8 @@ end # test latex symbol completions after unicode #9209 let s = "α\\alpha" c, r = test_bslashcomplete(s) - @test c[1] == "α" + @test c[1].completion == "α" + @test c[1].name == "α" @test r == 3:sizeof(s) @test length(c) == 1 end @@ -366,20 +374,25 @@ end # test emoji symbol completions let s = "\\:koala:" c, r = test_bslashcomplete(s) - @test c[1] == "🐨" + @test c[1].completion == "🐨" + @test c[1].name == "🐨" @test r == 1:sizeof(s) @test length(c) == 1 end let s = "\\:ko" c, r = test_bslashcomplete(s) - @test "\\:koala:" in c + ko = only(filter(c) do namedcompletion + namedcompletion.completion == "\\:koala:" + end) + @test ko.name == "🐨 \\:koala:" end # test emoji symbol completions after unicode #9209 let s = "α\\:koala:" c, r = test_bslashcomplete(s) - @test c[1] == "🐨" + @test c[1].name == "🐨" + @test c[1].completion == "🐨" @test r == 3:sizeof(s) @test length(c) == 1 end @@ -1069,8 +1082,8 @@ let s, c, r # Issue #8047 s = "@show \"/dev/nul\"" c,r = completions(s, 15) - c = map(completion_text, c) - @test "null\"" in c + c = map(named_completion, c) + @test "null\"" in [_c.completion for _c in c] @test r == 13:15 @test s[r] == "nul" @@ -1476,7 +1489,7 @@ function test_dict_completion(dict_name) @test c == Any["\"abcd\"]"] s = "$dict_name[\"abcd]" # trailing close bracket c, r = completions(s, lastindex(s) - 1) - c = map(completion_text, c) + c = map(x -> named_completion(x).completion, c) @test c == Any["\"abcd\""] s = "$dict_name[:b" c, r = test_complete(s) diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl index 7c985828d47f2..464cb2ac9de9c 100644 --- a/stdlib/Test/src/Test.jl +++ b/stdlib/Test/src/Test.jl @@ -1965,8 +1965,9 @@ Arguments #self#::Core.Const(f) a::Int64 Body::UNION{FLOAT64, INT64} -1 ─ %1 = (a > 1)::Bool -└── goto #3 if not %1 +1 ─ %1 = :>::Core.Const(>) +│ %2 = (%1)(a, 1)::Bool +└── goto #3 if not %2 2 ─ return 1 3 ─ return 1.0 diff --git a/test/abstractarray.jl b/test/abstractarray.jl index 2a2ec8e8e432c..4af4099eced45 100644 --- a/test/abstractarray.jl +++ b/test/abstractarray.jl @@ -2186,3 +2186,23 @@ end copyto!(A, 1, x, 1) @test A == axes(A,1) end + +@testset "reshape with Integer sizes" begin + @test reshape(1:4, big(2), big(2)) == reshape(1:4, 2, 2) + a = [1 2 3; 4 5 6] + reshaped_arrays = ( + reshape(a, 3, 2), + reshape(a, (3, 2)), + reshape(a, big(3), big(2)), + reshape(a, (big(3), big(2))), + reshape(a, :, big(2)), + reshape(a, (:, big(2))), + reshape(a, big(3), :), + reshape(a, (big(3), :)), + ) + @test allequal(reshaped_arrays) + for b ∈ reshaped_arrays + @test b isa Matrix{Int} + @test b.ref === a.ref + end +end diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl index cc3f8950f0dc0..5df174694049d 100644 --- a/test/cmdlineargs.jl +++ b/test/cmdlineargs.jl @@ -783,6 +783,15 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` "Int(Base.JLOptions().fast_math)"`)) == JL_OPTIONS_FAST_MATH_DEFAULT end + let JL_OPTIONS_TASK_METRICS_OFF = 0, JL_OPTIONS_TASK_METRICS_ON = 1 + @test parse(Int,readchomp(`$exename -E + "Int(Base.JLOptions().task_metrics)"`)) == JL_OPTIONS_TASK_METRICS_OFF + @test parse(Int, readchomp(`$exename --task-metrics=yes -E + "Int(Base.JLOptions().task_metrics)"`)) == JL_OPTIONS_TASK_METRICS_ON + @test !parse(Bool, readchomp(`$exename -E "current_task().metrics_enabled"`)) + @test parse(Bool, readchomp(`$exename --task-metrics=yes -E "current_task().metrics_enabled"`)) + end + # --worker takes default / custom as argument (default/custom arguments # tested in test/parallel.jl) @test errors_not_signals(`$exename --worker=true`) diff --git a/test/core.jl b/test/core.jl index 39d02d5d567c9..7e4f655222ea5 100644 --- a/test/core.jl +++ b/test/core.jl @@ -25,6 +25,7 @@ for (T, c) in ( (TypeVar, [:name, :ub, :lb]), (Core.Memory, [:length, :ptr]), (Core.GenericMemoryRef, [:mem, :ptr_or_offset]), + (Task, [:metrics_enabled]), ) @test Set((fieldname(T, i) for i in 1:fieldcount(T) if isconst(T, i))) == Set(c) end @@ -42,7 +43,7 @@ for (T, c) in ( (DataType, [:types, :layout]), (Core.Memory, []), (Core.GenericMemoryRef, []), - (Task, [:_state]) + (Task, [:_state, :running_time_ns, :finished_at, :first_enqueued_at, :last_started_running_at]), ) @test Set((fieldname(T, i) for i in 1:fieldcount(T) if Base.isfieldatomic(T, i))) == Set(c) end @@ -7434,6 +7435,7 @@ end @test isa(Core.eval(@__MODULE__, :(Bar31062(()))), Bar31062) @test precompile(identity, (Foo31062,)) +using Core: SSAValue ftype_eval = Ref(0) FieldTypeA = String FieldTypeE = UInt32 @@ -7457,27 +7459,41 @@ let fc = FieldConvert(1.0, [2.0], 0x3, 0x4, 0x5) end @test ftype_eval[] == 1 let code = code_lowered(FieldConvert)[1].code - local fc_global_ssa, sp1_ssa, apply_type_ssa, field_type_ssa, - field_type2_ssa, field_type4_ssa, field_type5_ssa, - slot_read_1, slot_read_2, slot_read_3, slot_read_4, - new_ssa - @test code[(fc_global_ssa = 1;)] == GlobalRef(@__MODULE__, :FieldConvert) - @test code[(sp1_ssa = 2;)] == Expr(:static_parameter, 1) - @test code[(apply_type_ssa = 3;)] == Expr(:call, GlobalRef(Core, :apply_type), Core.SSAValue(fc_global_ssa), GlobalRef(@__MODULE__, :FieldTypeA), Core.SSAValue(sp1_ssa)) - @test code[(field_type_ssa = 4;)] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(apply_type_ssa), 1) - @test code[10] == Expr(:(=), Core.SlotNumber(10), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(field_type_ssa), Core.SlotNumber(10))) - @test code[(slot_read_1 = 11;)] == Core.SlotNumber(10) - @test code[(field_type2_ssa = 12;)] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(apply_type_ssa), 2) - @test code[18] == Expr(:(=), Core.SlotNumber(9), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(field_type2_ssa), Core.SlotNumber(9))) - @test code[(slot_read_2 = 19;)] == Core.SlotNumber(9) - @test code[(field_type4_ssa = 20;)] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(apply_type_ssa), 4) - @test code[26] == Expr(:(=), Core.SlotNumber(8), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(field_type4_ssa), Core.SlotNumber(8))) - @test code[(slot_read_3 = 27;)] == Core.SlotNumber(8) - @test code[(field_type5_ssa = 28;)] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(apply_type_ssa), 5) - @test code[34] == Expr(:(=), Core.SlotNumber(7), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(field_type5_ssa), Core.SlotNumber(7))) - @test code[(slot_read_4 = 35;)] == Core.SlotNumber(7) - @test code[(new_ssa = 36;)] == Expr(:new, Core.SSAValue(apply_type_ssa), Core.SSAValue(slot_read_1), Core.SSAValue(slot_read_2), Core.SlotNumber(4), Core.SSAValue(slot_read_3), Core.SSAValue(slot_read_4)) - @test code[37] == Core.ReturnNode(Core.SSAValue(new_ssa)) + calls = Vector{Pair{SSAValue, Expr}}(undef, 0) + for i = 1:length(code) + expr = code[i] + if Meta.isexpr(expr, :call) || (Meta.isexpr(expr, :(=)) && Meta.isexpr(expr.args[2], :call)) + push!(calls, SSAValue(i)=>expr) + end + end + + function is_globalref(arg, gr) + while isa(arg, SSAValue) + arg = code[arg.id] + end + arg == gr + end + + # calls[1] + @test all(is_globalref.(calls[1][2].args[1:3], (GlobalRef(Core, :apply_type), GlobalRef(@__MODULE__, :FieldConvert), GlobalRef(@__MODULE__, :FieldTypeA)))) + + # calls[2] + @test all(is_globalref.(calls[2][2].args[1:1], (GlobalRef(Core, :fieldtype),))) + @test all(calls[2][2].args[2:3] .== (calls[1][1], 1)) + + # calls[3] - isa + + # calls[4] + let calle = calls[4][2] + @test Meta.isexpr(calle, :(=)) + call = calle.args[2] + @test is_globalref(call.args[1], GlobalRef(Base, :convert)) + @test call.args[2] == calls[2][1] + end + + # calls[5] + @test all(is_globalref.(calls[5][2].args[1:1], (GlobalRef(Core, :fieldtype),))) + @test all(calls[5][2].args[2:3] .== (calls[1][1], 2)) end # Issue #32820 @@ -8155,7 +8171,7 @@ end @test Core.Compiler.is_foldable(Base.infer_effects(length, (Core.SimpleVector,))) @test Core.Compiler.is_foldable(Base.infer_effects(getindex, (Core.SimpleVector,Int))) -# Test that a nothrow-globalref doesn't get outlined during lowering +# Test that a the lowering of nothrow globalref module WellKnownGlobal global well_known = 1 end @@ -8164,7 +8180,7 @@ macro insert_global() end check_globalref_lowering() = @insert_global let src = code_lowered(check_globalref_lowering)[1] - @test length(src.code) == 2 + @test length(src.code) == 4 end # Test correctness of widen_diagonal @@ -8353,9 +8369,23 @@ end @test eval(Expr(:toplevel, :(@define_call(f_macro_defined1)))) == 1 @test @define_call(f_macro_defined2) == 1 +# `invoke` of `Method` let m = which(+, (Int, Int)) @eval f56692(i) = invoke(+, $m, i, 4) global g56692() = f56692(5) == 9 ? "true" : false end @test @inferred(f56692(3)) == 7 @test @inferred(g56692()) == "true" + +# `invoke` of `CodeInstance` +f_invalidate_me() = return 1 +f_invoke_me() = return f_invalidate_me() +@test f_invoke_me() == 1 +const f_invoke_me_ci = Base.specialize_method(Base._which(Tuple{typeof(f_invoke_me)})).cache +f_call_me() = invoke(f_invoke_me, f_invoke_me_ci) +@test invoke(f_invoke_me, f_invoke_me_ci) == 1 +@test f_call_me() == 1 +@test_throws TypeError invoke(f_invoke_me, f_invoke_me_ci, 1) +f_invalidate_me() = 2 +@test_throws ErrorException invoke(f_invoke_me, f_invoke_me_ci) +@test_throws ErrorException f_call_me() diff --git a/test/offsetarray.jl b/test/offsetarray.jl index fb5855dfbaa0d..8e2ee33c49ed6 100644 --- a/test/offsetarray.jl +++ b/test/offsetarray.jl @@ -914,3 +914,14 @@ end b = sum(a, dims=1) @test b[begin] == sum(r) end + +@testset "reshape" begin + A0 = [1 3; 2 4] + A = reshape(A0, 2:3, 4:5) + @test axes(A) == Base.IdentityUnitRange.((2:3, 4:5)) + + B = reshape(A0, -10:-9, 9:10) + @test isa(B, OffsetArray{Int,2}) + @test parent(B) == A0 + @test axes(B) == Base.IdentityUnitRange.((-10:-9, 9:10)) +end diff --git a/test/staged.jl b/test/staged.jl index 1b28144639f97..6cb99950a7bb2 100644 --- a/test/staged.jl +++ b/test/staged.jl @@ -270,12 +270,12 @@ end # PR #23168 -function f23168(a, x) +@eval function f23168(a, x) push!(a, 1) if @generated - :(y = x + x) + :(y = $(+)(x, x)) else - y = 2x + y = $(*)(2, x) end push!(a, y) if @generated @@ -290,9 +290,9 @@ end let a = Any[] @test f23168(a, 3) == (6, Int) @test a == [1, 6, 3] - @test occursin(" + ", string(code_lowered(f23168, (Vector{Any},Int)))) - @test occursin("2 * ", string(Base.uncompressed_ir(first(methods(f23168))))) - @test occursin("2 * ", string(code_lowered(f23168, (Vector{Any},Int), generated=false))) + @test occursin("(+)(", string(code_lowered(f23168, (Vector{Any},Int)))) + @test occursin("(*)(2", string(Base.uncompressed_ir(first(methods(f23168))))) + @test occursin("(*)(2", string(code_lowered(f23168, (Vector{Any},Int), generated=false))) @test occursin("Base.add_int", string(code_typed(f23168, (Vector{Any},Int)))) end diff --git a/test/syntax.jl b/test/syntax.jl index d9d311ac6615d..315f2d8b0f38b 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -3713,7 +3713,7 @@ end module Foreign54607 # Syntactic, not dynamic try_to_create_binding1() = (Foreign54607.foo = 2) - # GlobalRef is allowed for same-module assignment + # GlobalRef is allowed for same-module assignment and declares the binding @eval try_to_create_binding2() = ($(GlobalRef(Foreign54607, :foo2)) = 2) function global_create_binding() global bar @@ -3728,7 +3728,7 @@ module Foreign54607 end @test_throws ErrorException (Foreign54607.foo = 1) @test_throws ErrorException Foreign54607.try_to_create_binding1() -@test_throws ErrorException Foreign54607.try_to_create_binding2() +Foreign54607.try_to_create_binding2() function assign_in_foreign_module() (Foreign54607.foo = 1) nothing @@ -3744,6 +3744,7 @@ Foreign54607.global_create_binding() @test isdefined(Foreign54607, :baz) @test isdefined(Foreign54607, :compiled_assign) @test isdefined(Foreign54607, :gr_assign) +@test isdefined(Foreign54607, :foo2) Foreign54607.bar = 8 @test Foreign54607.bar == 8 begin @@ -4033,3 +4034,11 @@ end @test isa(create_inner_f_no_methods(), Function) @test length(methods(create_inner_f_no_methods())) == 0 @test Base.invoke_in_world(first(methods(create_inner_f_one_method)).primary_world, create_inner_f_one_method()) == 1 + +# Issue 56711 - Scope of signature hoisting +function fs56711() + f(lhs::Integer) = 1 + f(lhs::Integer, rhs::(local x_should_not_be_defined=Integer; x_should_not_be_defined)) = 2 + return f +end +@test !@isdefined(x_should_not_be_defined) diff --git a/test/testhelpers/OffsetArrays.jl b/test/testhelpers/OffsetArrays.jl index 17b2d8c28680a..e895372a34974 100644 --- a/test/testhelpers/OffsetArrays.jl +++ b/test/testhelpers/OffsetArrays.jl @@ -529,7 +529,7 @@ _similar_axes_or_length(AT, ax::I, ::I) where {I} = similar(AT, map(_indexlength # reshape accepts a single colon Base.reshape(A::AbstractArray, inds::OffsetAxis...) = reshape(A, inds) -function Base.reshape(A::AbstractArray, inds::Tuple{OffsetAxis,Vararg{OffsetAxis}}) +function Base.reshape(A::AbstractArray, inds::Tuple{Vararg{OffsetAxis}}) AR = reshape(no_offset_view(A), map(_indexlength, inds)) O = OffsetArray(AR, map(_offset, axes(AR), inds)) return _popreshape(O, axes(AR), _filterreshapeinds(inds)) @@ -557,6 +557,8 @@ Base.reshape(A::OffsetArray, inds::Tuple{OffsetAxis,Vararg{OffsetAxis}}) = OffsetArray(_reshape(parent(A), inds), map(_toaxis, inds)) # And for non-offset axes, we can just return a reshape of the parent directly Base.reshape(A::OffsetArray, inds::Tuple{Union{Integer,Base.OneTo},Vararg{Union{Integer,Base.OneTo}}}) = _reshape_nov(A, inds) +Base.reshape(A::OffsetArray, inds::Tuple{Integer,Vararg{Integer}}) = _reshape_nov(A, inds) +Base.reshape(A::OffsetArray, inds::Tuple{Union{Colon, Integer}, Vararg{Union{Colon, Integer}}}) = _reshape_nov(A, inds) Base.reshape(A::OffsetArray, inds::Dims) = _reshape_nov(A, inds) Base.reshape(A::OffsetVector, ::Colon) = A Base.reshape(A::OffsetVector, ::Tuple{Colon}) = A diff --git a/test/threads_exec.jl b/test/threads_exec.jl index ac54dd009390c..d77cf06905f44 100644 --- a/test/threads_exec.jl +++ b/test/threads_exec.jl @@ -3,6 +3,7 @@ using Test using Base.Threads using Base.Threads: SpinLock, threadpoolsize +using LinearAlgebra: peakflops # for cfunction_closure include("testenv.jl") @@ -1312,4 +1313,227 @@ end end end end + +@testset "Base.Experimental.task_metrics" begin + t = Task(() -> nothing) + @test_throws "const field" t.metrics_enabled = true + is_task_metrics_enabled() = fetch(Threads.@spawn current_task().metrics_enabled) + @test !is_task_metrics_enabled() + try + @testset "once" begin + Base.Experimental.task_metrics(true) + @test is_task_metrics_enabled() + Base.Experimental.task_metrics(false) + @test !is_task_metrics_enabled() + end + @testset "multiple" begin + Base.Experimental.task_metrics(true) # 1 + Base.Experimental.task_metrics(true) # 2 + Base.Experimental.task_metrics(true) # 3 + @test is_task_metrics_enabled() + Base.Experimental.task_metrics(false) # 2 + @test is_task_metrics_enabled() + Base.Experimental.task_metrics(false) # 1 + @test is_task_metrics_enabled() + @sync for i in 1:5 # 0 (not negative) + Threads.@spawn Base.Experimental.task_metrics(false) + end + @test !is_task_metrics_enabled() + Base.Experimental.task_metrics(true) # 1 + @test is_task_metrics_enabled() + end + finally + while is_task_metrics_enabled() + Base.Experimental.task_metrics(false) + end + end +end + +@testset "task time counters" begin + @testset "enabled" begin + try + Base.Experimental.task_metrics(true) + start_time = time_ns() + t = Threads.@spawn peakflops() + wait(t) + end_time = time_ns() + wall_time_delta = end_time - start_time + @test t.metrics_enabled + @test Base.Experimental.task_running_time_ns(t) > 0 + @test Base.Experimental.task_wall_time_ns(t) > 0 + @test Base.Experimental.task_wall_time_ns(t) >= Base.Experimental.task_running_time_ns(t) + @test wall_time_delta > Base.Experimental.task_wall_time_ns(t) + finally + Base.Experimental.task_metrics(false) + end + end + @testset "disabled" begin + t = Threads.@spawn peakflops() + wait(t) + @test !t.metrics_enabled + @test isnothing(Base.Experimental.task_running_time_ns(t)) + @test isnothing(Base.Experimental.task_wall_time_ns(t)) + end + @testset "task not run" begin + t1 = Task(() -> nothing) + @test !t1.metrics_enabled + @test isnothing(Base.Experimental.task_running_time_ns(t1)) + @test isnothing(Base.Experimental.task_wall_time_ns(t1)) + try + Base.Experimental.task_metrics(true) + t2 = Task(() -> nothing) + @test t2.metrics_enabled + @test Base.Experimental.task_running_time_ns(t2) == 0 + @test Base.Experimental.task_wall_time_ns(t2) == 0 + finally + Base.Experimental.task_metrics(false) + end + end + @testset "task failure" begin + try + Base.Experimental.task_metrics(true) + t = Threads.@spawn error("this task failed") + @test_throws "this task failed" wait(t) + @test Base.Experimental.task_running_time_ns(t) > 0 + @test Base.Experimental.task_wall_time_ns(t) > 0 + @test Base.Experimental.task_wall_time_ns(t) >= Base.Experimental.task_running_time_ns(t) + finally + Base.Experimental.task_metrics(false) + end + end + @testset "direct yield(t)" begin + try + Base.Experimental.task_metrics(true) + start = time_ns() + t_outer = Threads.@spawn begin + t_inner = Task(() -> peakflops()) + t_inner.sticky = false + # directly yield to `t_inner` rather calling `schedule(t_inner)` + yield(t_inner) + wait(t_inner) + @test Base.Experimental.task_running_time_ns(t_inner) > 0 + @test Base.Experimental.task_wall_time_ns(t_inner) > 0 + @test Base.Experimental.task_wall_time_ns(t_inner) >= Base.Experimental.task_running_time_ns(t_inner) + end + wait(t_outer) + delta = time_ns() - start + @test Base.Experimental.task_running_time_ns(t_outer) > 0 + @test Base.Experimental.task_wall_time_ns(t_outer) > 0 + @test Base.Experimental.task_wall_time_ns(t_outer) >= Base.Experimental.task_running_time_ns(t_outer) + @test Base.Experimental.task_wall_time_ns(t_outer) < delta + finally + Base.Experimental.task_metrics(false) + end + end + @testset "bad schedule" begin + try + Base.Experimental.task_metrics(true) + t1 = Task((x) -> 1) + schedule(t1) # MethodError + yield() + @assert istaskfailed(t1) + @test Base.Experimental.task_running_time_ns(t1) > 0 + @test Base.Experimental.task_wall_time_ns(t1) > 0 + foo(a, b) = a + b + t2 = Task(() -> (peakflops(); foo(wait()))) + schedule(t2) + yield() + @assert istaskstarted(t1) && !istaskdone(t2) + schedule(t2, 1) + yield() + @assert istaskfailed(t2) + @test Base.Experimental.task_running_time_ns(t2) > 0 + @test Base.Experimental.task_wall_time_ns(t2) > 0 + finally + Base.Experimental.task_metrics(false) + end + end + @testset "continuously update until task done" begin + try + Base.Experimental.task_metrics(true) + last_running_time = Ref(typemax(Int)) + last_wall_time = Ref(typemax(Int)) + t = Threads.@spawn begin + running_time = Base.Experimental.task_running_time_ns() + wall_time = Base.Experimental.task_wall_time_ns() + for _ in 1:5 + x = time_ns() + while time_ns() < x + 100 + end + new_running_time = Base.Experimental.task_running_time_ns() + new_wall_time = Base.Experimental.task_wall_time_ns() + @test new_running_time > running_time + @test new_wall_time > wall_time + running_time = new_running_time + wall_time = new_wall_time + end + last_running_time[] = running_time + last_wall_time[] = wall_time + end + wait(t) + final_running_time = Base.Experimental.task_running_time_ns(t) + final_wall_time = Base.Experimental.task_wall_time_ns(t) + @test last_running_time[] < final_running_time + @test last_wall_time[] < final_wall_time + # ensure many more tasks are run to make sure the counters are + # not being updated after a task is done e.g. only when a new task is found + @sync for _ in 1:Threads.nthreads() + Threads.@spawn rand() + end + @test final_running_time == Base.Experimental.task_running_time_ns(t) + @test final_wall_time == Base.Experimental.task_wall_time_ns(t) + finally + Base.Experimental.task_metrics(false) + end + end +end + +@testset "task time counters: lots of spawns" begin + using Dates + try + Base.Experimental.task_metrics(true) + # create more tasks than we have threads. + # - all tasks must have: cpu time <= wall time + # - some tasks must have: cpu time < wall time + # - summing across all tasks we must have: total cpu time <= available cpu time + n_tasks = 2 * Threads.nthreads(:default) + cpu_times = Vector{UInt64}(undef, n_tasks) + wall_times = Vector{UInt64}(undef, n_tasks) + start_time = time_ns() + @sync begin + for i in 1:n_tasks + start_time_i = time_ns() + task_i = Threads.@spawn peakflops() + Threads.@spawn begin + wait(task_i) + end_time_i = time_ns() + wall_time_delta_i = end_time_i - start_time_i + cpu_times[$i] = cpu_time_i = Base.Experimental.task_running_time_ns(task_i) + wall_times[$i] = wall_time_i = Base.Experimental.task_wall_time_ns(task_i) + # task should have recorded some cpu-time and some wall-time + @test cpu_time_i > 0 + @test wall_time_i > 0 + # task cpu-time cannot be greater than its wall-time + @test wall_time_i >= cpu_time_i + # task wall-time must be less than our manually measured wall-time + # between calling `@spawn` and returning from `wait`. + @test wall_time_delta_i > wall_time_i + end + end + end + end_time = time_ns() + wall_time_delta = (end_time - start_time) + available_cpu_time = wall_time_delta * Threads.nthreads(:default) + summed_cpu_time = sum(cpu_times) + # total CPU time from all tasks can't exceed what was actually available. + @test available_cpu_time > summed_cpu_time + # some tasks must have cpu-time less than their wall-time, because we had more tasks + # than threads. + summed_wall_time = sum(wall_times) + @test summed_wall_time > summed_cpu_time + finally + Base.Experimental.task_metrics(false) + end +end + end # main testset