From 1f935afc07edde9f8c2e1a0f05d4772e18a55e97 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 17 Oct 2024 15:50:28 -0400 Subject: [PATCH 01/47] [REPL] fix lock ordering mistake in load_pkg (#56215) Fixes #56206 --- base/loading.jl | 1 + stdlib/REPL/src/Pkg_beforeload.jl | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index b396c7897c1fd..49d0cb52cd37b 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -2093,6 +2093,7 @@ debug_loading_deadlocks::Bool = true # Enable a slightly more expensive, but mor function start_loading(modkey::PkgId, build_id::UInt128, stalecheck::Bool) # handle recursive and concurrent calls to require assert_havelock(require_lock) + require_lock.reentrancy_cnt == 1 || throw(ConcurrencyViolationError("recursive call to start_loading")) while true loaded = stalecheck ? maybe_root_module(modkey) : nothing loaded isa Module && return loaded diff --git a/stdlib/REPL/src/Pkg_beforeload.jl b/stdlib/REPL/src/Pkg_beforeload.jl index 472fbc924668d..e110910bafc2f 100644 --- a/stdlib/REPL/src/Pkg_beforeload.jl +++ b/stdlib/REPL/src/Pkg_beforeload.jl @@ -1,17 +1,16 @@ ## Pkg stuff needed before Pkg has loaded const Pkg_pkgid = Base.PkgId(Base.UUID("44cfe95a-1eb2-52ea-b672-e2afdf69b78f"), "Pkg") -const Pkg_REPLExt_pkgid = Base.PkgId(Base.UUID("ceef7b17-42e7-5b1c-81d4-4cc4a2494ccf"), "REPLExt") function load_pkg() + REPLExt = Base.require_stdlib(Pkg_pkgid, "REPLExt") @lock Base.require_lock begin - REPLExt = Base.require_stdlib(Pkg_pkgid, "REPLExt") # require_stdlib does not guarantee that the `__init__` of the package is done when loading is done async # but we need to wait for the repl mode to be set up - lock = get(Base.package_locks, Pkg_REPLExt_pkgid.uuid, nothing) + lock = get(Base.package_locks, Base.PkgId(REPLExt), nothing) lock !== nothing && wait(lock[2]) - return REPLExt end + return REPLExt end ## Below here copied/tweaked from Pkg Types.jl so that the dummy Pkg prompt From 1cf384222a8b70a1f9cafbefd56cc0940fbb66c1 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 17 Oct 2024 17:16:25 -0400 Subject: [PATCH 02/47] REPL: fix unsafe_write return type (#56220) Fixes: #56219 I am not really sure why we have a test for this, but we need to make the test happy --- stdlib/REPL/src/REPL.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index 88458f7de4666..b8f850c3e9ff9 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -525,7 +525,7 @@ function Base.unsafe_write(limiter::LimitIO, p::Ptr{UInt8}, nb::UInt) end # We won't hit the limit so we'll write the full `nb` bytes - bytes_written = Base.unsafe_write(limiter.io, p, nb) + bytes_written = Base.unsafe_write(limiter.io, p, nb)::Union{Int,UInt} limiter.n += bytes_written return bytes_written end From f1990e2e3d8d31087b23288f640ba16909f7ec7b Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Fri, 18 Oct 2024 02:49:19 +0530 Subject: [PATCH 03/47] Fix triu/tril for partly initialized matrices (#55312) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes ```julia julia> using LinearAlgebra, StaticArrays julia> M = Matrix{BigInt}(undef, 2, 2); M[1,1] = M[2,2] = M[1,2] = 3; julia> S = SizedMatrix{2,2}(M) 2×2 SizedMatrix{2, 2, BigInt, 2, Matrix{BigInt}} with indices SOneTo(2)×SOneTo(2): 3 3 #undef 3 julia> triu(S) ERROR: UndefRefError: access to undefined reference Stacktrace: [1] getindex @ ./essentials.jl:907 [inlined] [2] getindex @ ~/.julia/packages/StaticArrays/MSJcA/src/SizedArray.jl:92 [inlined] [3] copyto_unaliased! @ ./abstractarray.jl:1086 [inlined] [4] copyto!(dest::SizedMatrix{2, 2, BigInt, 2, Matrix{BigInt}}, src::SizedMatrix{2, 2, BigInt, 2, Matrix{BigInt}}) @ Base ./abstractarray.jl:1066 [5] copymutable @ ./abstractarray.jl:1200 [inlined] [6] triu(M::SizedMatrix{2, 2, BigInt, 2, Matrix{BigInt}}) @ LinearAlgebra ~/.julia/juliaup/julia-nightly/share/julia/stdlib/v1.12/LinearAlgebra/src/generic.jl:413 [7] top-level scope @ REPL[11]:1 ``` After this PR: ```julia julia> triu(S) 2×2 SizedMatrix{2, 2, BigInt, 2, Matrix{BigInt}} with indices SOneTo(2)×SOneTo(2): 3 3 0 3 ``` Only the indices that need to be copied are accessed, and the others are written to without being read. --- stdlib/LinearAlgebra/src/generic.jl | 80 ++++++++++------------------ stdlib/LinearAlgebra/test/generic.jl | 55 +++++++++++++++++++ 2 files changed, 83 insertions(+), 52 deletions(-) diff --git a/stdlib/LinearAlgebra/src/generic.jl b/stdlib/LinearAlgebra/src/generic.jl index e5f23b4981616..6c65c49add74b 100644 --- a/stdlib/LinearAlgebra/src/generic.jl +++ b/stdlib/LinearAlgebra/src/generic.jl @@ -389,55 +389,7 @@ function cross(a::AbstractVector, b::AbstractVector) end """ - triu(M) - -Upper triangle of a matrix. - -# Examples -```jldoctest -julia> a = fill(1.0, (4,4)) -4×4 Matrix{Float64}: - 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 - -julia> triu(a) -4×4 Matrix{Float64}: - 1.0 1.0 1.0 1.0 - 0.0 1.0 1.0 1.0 - 0.0 0.0 1.0 1.0 - 0.0 0.0 0.0 1.0 -``` -""" -triu(M::AbstractMatrix) = triu!(copymutable(M)) - -""" - tril(M) - -Lower triangle of a matrix. - -# Examples -```jldoctest -julia> a = fill(1.0, (4,4)) -4×4 Matrix{Float64}: - 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 - -julia> tril(a) -4×4 Matrix{Float64}: - 1.0 0.0 0.0 0.0 - 1.0 1.0 0.0 0.0 - 1.0 1.0 1.0 0.0 - 1.0 1.0 1.0 1.0 -``` -""" -tril(M::AbstractMatrix) = tril!(copymutable(M)) - -""" - triu(M, k::Integer) + triu(M, k::Integer = 0) Return the upper triangle of `M` starting from the `k`th superdiagonal. @@ -465,10 +417,22 @@ julia> triu(a,-3) 1.0 1.0 1.0 1.0 ``` """ -triu(M::AbstractMatrix,k::Integer) = triu!(copymutable(M),k) +function triu(M::AbstractMatrix, k::Integer = 0) + d = similar(M) + A = triu!(d,k) + if iszero(k) + copytrito!(A, M, 'U') + else + for col in axes(A,2) + rows = firstindex(A,1):min(col-k, lastindex(A,1)) + A[rows, col] = @view M[rows, col] + end + end + return A +end """ - tril(M, k::Integer) + tril(M, k::Integer = 0) Return the lower triangle of `M` starting from the `k`th superdiagonal. @@ -496,7 +460,19 @@ julia> tril(a,-3) 1.0 0.0 0.0 0.0 ``` """ -tril(M::AbstractMatrix,k::Integer) = tril!(copymutable(M),k) +function tril(M::AbstractMatrix,k::Integer=0) + d = similar(M) + A = tril!(d,k) + if iszero(k) + copytrito!(A, M, 'L') + else + for col in axes(A,2) + rows = max(firstindex(A,1),col-k):lastindex(A,1) + A[rows, col] = @view M[rows, col] + end + end + return A +end """ triu!(M) diff --git a/stdlib/LinearAlgebra/test/generic.jl b/stdlib/LinearAlgebra/test/generic.jl index e0a1704913f78..2bf9c75141700 100644 --- a/stdlib/LinearAlgebra/test/generic.jl +++ b/stdlib/LinearAlgebra/test/generic.jl @@ -18,6 +18,9 @@ using .Main.DualNumbers isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl")) using .Main.FillArrays +isdefined(Main, :SizedArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "SizedArrays.jl")) +using .Main.SizedArrays + Random.seed!(123) n = 5 # should be odd @@ -725,4 +728,56 @@ end @test det(A) == det(M) end +@testset "tril/triu" begin + @testset "with partly initialized matrices" begin + function test_triu(M, k=nothing) + M[1,1] = M[2,2] = M[1,2] = M[1,3] = M[2,3] = 3 + if isnothing(k) + MU = triu(M) + else + MU = triu(M, k) + end + @test iszero(MU[2,1]) + @test MU[1,1] == MU[2,2] == MU[1,2] == MU[1,3] == MU[2,3] == 3 + end + test_triu(Matrix{BigInt}(undef, 2, 3)) + test_triu(Matrix{BigInt}(undef, 2, 3), 0) + test_triu(SizedArrays.SizedArray{(2,3)}(Matrix{BigInt}(undef, 2, 3))) + test_triu(SizedArrays.SizedArray{(2,3)}(Matrix{BigInt}(undef, 2, 3)), 0) + + function test_tril(M, k=nothing) + M[1,1] = M[2,2] = M[2,1] = 3 + if isnothing(k) + ML = tril(M) + else + ML = tril(M, k) + end + @test ML[1,2] == ML[1,3] == ML[2,3] == 0 + @test ML[1,1] == ML[2,2] == ML[2,1] == 3 + end + test_tril(Matrix{BigInt}(undef, 2, 3)) + test_tril(Matrix{BigInt}(undef, 2, 3), 0) + test_tril(SizedArrays.SizedArray{(2,3)}(Matrix{BigInt}(undef, 2, 3))) + test_tril(SizedArrays.SizedArray{(2,3)}(Matrix{BigInt}(undef, 2, 3)), 0) + end + + @testset "block arrays" begin + for nrows in 0:3, ncols in 0:3 + M = [randn(2,2) for _ in 1:nrows, _ in 1:ncols] + Mu = triu(M) + for col in axes(M,2) + rowcutoff = min(col, size(M,1)) + @test @views Mu[1:rowcutoff, col] == M[1:rowcutoff, col] + @test @views Mu[rowcutoff+1:end, col] == zero.(M[rowcutoff+1:end, col]) + end + Ml = tril(M) + for col in axes(M,2) + @test @views Ml[col:end, col] == M[col:end, col] + rowcutoff = min(col-1, size(M,1)) + @test @views Ml[1:rowcutoff, col] == zero.(M[1:rowcutoff, col]) + end + end + end +end + end # module TestGeneric From e33c6a8551e070e7936a2ac95180a6c834f56549 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Fri, 18 Oct 2024 17:04:44 +0530 Subject: [PATCH 04/47] Specialize adding/subtracting mixed Upper/LowerTriangular (#56149) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes https://github.com/JuliaLang/julia/issues/56134 After this, ```julia julia> using LinearAlgebra julia> A = hermitianpart(rand(4, 4)) 4×4 Hermitian{Float64, Matrix{Float64}}: 0.387617 0.277226 0.67629 0.60678 0.277226 0.894101 0.388416 0.489141 0.67629 0.388416 0.100907 0.619955 0.60678 0.489141 0.619955 0.452605 julia> B = UpperTriangular(A) 4×4 UpperTriangular{Float64, Hermitian{Float64, Matrix{Float64}}}: 0.387617 0.277226 0.67629 0.60678 ⋅ 0.894101 0.388416 0.489141 ⋅ ⋅ 0.100907 0.619955 ⋅ ⋅ ⋅ 0.452605 julia> B - B' 4×4 Matrix{Float64}: 0.0 0.277226 0.67629 0.60678 -0.277226 0.0 0.388416 0.489141 -0.67629 -0.388416 0.0 0.619955 -0.60678 -0.489141 -0.619955 0.0 ``` This preserves the band structure of the parent, if any: ```julia julia> U = UpperTriangular(Diagonal(ones(4))) 4×4 UpperTriangular{Float64, Diagonal{Float64, Vector{Float64}}}: 1.0 0.0 0.0 0.0 ⋅ 1.0 0.0 0.0 ⋅ ⋅ 1.0 0.0 ⋅ ⋅ ⋅ 1.0 julia> U - U' 4×4 Diagonal{Float64, Vector{Float64}}: 0.0 ⋅ ⋅ ⋅ ⋅ 0.0 ⋅ ⋅ ⋅ ⋅ 0.0 ⋅ ⋅ ⋅ ⋅ 0.0 ``` This doesn't fully work with partly initialized matrices, and would need https://github.com/JuliaLang/julia/pull/55312 for that. The abstract triangular methods now construct matrices using `similar(parent(U), size(U))` so that the destinations are fully mutable. ```julia julia> @invoke B::LinearAlgebra.AbstractTriangular - B'::LinearAlgebra.AbstractTriangular 4×4 Matrix{Float64}: 0.0 0.277226 0.67629 0.60678 -0.277226 0.0 0.388416 0.489141 -0.67629 -0.388416 0.0 0.619955 -0.60678 -0.489141 -0.619955 0.0 ``` --------- Co-authored-by: Daniel Karrasch --- stdlib/LinearAlgebra/src/triangular.jl | 19 +++++++++-- stdlib/LinearAlgebra/test/triangular.jl | 43 +++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/stdlib/LinearAlgebra/src/triangular.jl b/stdlib/LinearAlgebra/src/triangular.jl index 71660bc5ca28c..83ef221329d33 100644 --- a/stdlib/LinearAlgebra/src/triangular.jl +++ b/stdlib/LinearAlgebra/src/triangular.jl @@ -142,6 +142,7 @@ UnitUpperTriangular const UpperOrUnitUpperTriangular{T,S} = Union{UpperTriangular{T,S}, UnitUpperTriangular{T,S}} const LowerOrUnitLowerTriangular{T,S} = Union{LowerTriangular{T,S}, UnitLowerTriangular{T,S}} const UpperOrLowerTriangular{T,S} = Union{UpperOrUnitUpperTriangular{T,S}, LowerOrUnitLowerTriangular{T,S}} +const UnitUpperOrUnitLowerTriangular{T,S} = Union{UnitUpperTriangular{T,S}, UnitLowerTriangular{T,S}} uppertriangular(M) = UpperTriangular(M) lowertriangular(M) = LowerTriangular(M) @@ -181,6 +182,16 @@ copy(A::UpperOrLowerTriangular{<:Any, <:StridedMaybeAdjOrTransMat}) = copyto!(si # then handle all methods that requires specific handling of upper/lower and unit diagonal +function full(A::Union{UpperTriangular,LowerTriangular}) + return _triangularize(A)(parent(A)) +end +function full(A::UnitUpperOrUnitLowerTriangular) + isupper = A isa UnitUpperTriangular + Ap = _triangularize(A)(parent(A), isupper ? 1 : -1) + Ap[diagind(Ap, IndexStyle(Ap))] = @view A[diagind(A, IndexStyle(A))] + return Ap +end + function full!(A::LowerTriangular) B = A.data tril!(B) @@ -571,6 +582,8 @@ end return A end +_triangularize(::UpperOrUnitUpperTriangular) = triu +_triangularize(::LowerOrUnitLowerTriangular) = tril _triangularize!(::UpperOrUnitUpperTriangular) = triu! _triangularize!(::LowerOrUnitLowerTriangular) = tril! @@ -880,7 +893,8 @@ function +(A::UnitLowerTriangular, B::UnitLowerTriangular) (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .+ B LowerTriangular(tril(A.data, -1) + tril(B.data, -1) + 2I) end -+(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A)), A) + copyto!(similar(parent(B)), B) ++(A::UpperOrLowerTriangular, B::UpperOrLowerTriangular) = full(A) + full(B) ++(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A), size(A)), A) + copyto!(similar(parent(B), size(B)), B) function -(A::UpperTriangular, B::UpperTriangular) (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .- B @@ -914,7 +928,8 @@ function -(A::UnitLowerTriangular, B::UnitLowerTriangular) (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .- B LowerTriangular(tril(A.data, -1) - tril(B.data, -1)) end --(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A)), A) - copyto!(similar(parent(B)), B) +-(A::UpperOrLowerTriangular, B::UpperOrLowerTriangular) = full(A) - full(B) +-(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A), size(A)), A) - copyto!(similar(parent(B), size(B)), B) function kron(A::UpperTriangular{T,<:StridedMaybeAdjOrTransMat}, B::UpperTriangular{S,<:StridedMaybeAdjOrTransMat}) where {T,S} C = UpperTriangular(Matrix{promote_op(*, T, S)}(undef, _kronsize(A, B))) diff --git a/stdlib/LinearAlgebra/test/triangular.jl b/stdlib/LinearAlgebra/test/triangular.jl index ec9a3079e2643..7acb3cbfc0c57 100644 --- a/stdlib/LinearAlgebra/test/triangular.jl +++ b/stdlib/LinearAlgebra/test/triangular.jl @@ -1322,4 +1322,47 @@ end end end +@testset "addition/subtraction of mixed triangular" begin + for A in (Hermitian(rand(4, 4)), Diagonal(rand(5))) + for T in (UpperTriangular, LowerTriangular, + UnitUpperTriangular, UnitLowerTriangular) + B = T(A) + M = Matrix(B) + R = B - B' + if A isa Diagonal + @test R isa Diagonal + end + @test R == M - M' + R = B + B' + if A isa Diagonal + @test R isa Diagonal + end + @test R == M + M' + C = MyTriangular(B) + @test C - C' == M - M' + @test C + C' == M + M' + end + end + @testset "unfilled parent" begin + @testset for T in (UpperTriangular, LowerTriangular, + UnitUpperTriangular, UnitLowerTriangular) + F = Matrix{BigFloat}(undef, 2, 2) + B = T(F) + isupper = B isa Union{UpperTriangular, UnitUpperTriangular} + B[1+!isupper, 1+isupper] = 2 + if !(B isa Union{UnitUpperTriangular, UnitLowerTriangular}) + B[1,1] = B[2,2] = 3 + end + M = Matrix(B) + @test B - B' == M - M' + @test B + B' == M + M' + @test B - copy(B') == M - M' + @test B + copy(B') == M + M' + C = MyTriangular(B) + @test C - C' == M - M' + @test C + C' == M + M' + end + end +end + end # module TestTriangular From 6317e02035e250ad071275da1caa3240ca6a7390 Mon Sep 17 00:00:00 2001 From: Fredrik Ekre Date: Fri, 18 Oct 2024 16:20:25 +0200 Subject: [PATCH 05/47] juliac: remove call to jl_set_newly_inferred (#56222) Moved in #56186 --- contrib/juliac-buildscript.jl | 1 - src/precompile_utils.c | 10 ++++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/contrib/juliac-buildscript.jl b/contrib/juliac-buildscript.jl index 50f96198c416b..490bca86e1cba 100644 --- a/contrib/juliac-buildscript.jl +++ b/contrib/juliac-buildscript.jl @@ -17,7 +17,6 @@ task.rngState3 = 0x3a77f7189200c20b task.rngState4 = 0x5502376d099035ae uuid_tuple = (UInt64(0), UInt64(0)) ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), Base.__toplevel__, uuid_tuple) -ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred) # Patch methods in Core and Base diff --git a/src/precompile_utils.c b/src/precompile_utils.c index a78d1e66dbb51..fc361d8b88e6f 100644 --- a/src/precompile_utils.c +++ b/src/precompile_utils.c @@ -312,10 +312,12 @@ static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_met } } } - n = jl_array_nrows(new_ext_cis); - for (i = 0; i < n; i++) { - jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_ext_cis, i); - precompile_enq_specialization_(ci->def, m); + if (new_ext_cis) { + n = jl_array_nrows(new_ext_cis); + for (i = 0; i < n; i++) { + jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_ext_cis, i); + precompile_enq_specialization_(ci->def, m); + } } void *native_code = jl_precompile_(m, 1); JL_GC_POP(); From a64ffa308f04a5bb35dda785caaff85d52296bad Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Fri, 18 Oct 2024 10:24:36 -0400 Subject: [PATCH 06/47] Fix `goto` insertion when dom-sorting IR in `slot2ssa` pass (#56189) Fix-up this pass a bit to correctly handle fall-through terminators that cannot have their BasicBlock extended (e.g. `Expr(:leave, ...)`) --- base/compiler/ssair/slot2ssa.jl | 75 ++++++++++++++++++++------------- test/compiler/irpasses.jl | 29 +++++++++++++ 2 files changed, 75 insertions(+), 29 deletions(-) diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl index e70633ffecf6a..2eacdf0f56cfe 100644 --- a/base/compiler/ssair/slot2ssa.jl +++ b/base/compiler/ssair/slot2ssa.jl @@ -339,43 +339,58 @@ RPO traversal and in particular, any use of an SSA value must come after (by linear order) its definition. """ function domsort_ssa!(ir::IRCode, domtree::DomTree) - # First compute the new order of basic blocks + # Mapping from new → old BB index + # An "old" index of 0 means that this was a BB inserted as part of a fixup (see below) result_order = Int[] - stack = Int[] + + # Mapping from old → new BB index bb_rename = fill(-1, length(ir.cfg.blocks)) - node = 1 - ncritbreaks = 0 - nnewfallthroughs = 0 - while node !== -1 - push!(result_order, node) - bb_rename[node] = length(result_order) - cs = domtree.nodes[node].children - terminator = ir[SSAValue(last(ir.cfg.blocks[node].stmts))][:stmt] - next_node = node + 1 - node = -1 + + # The number of GotoNodes we need to insert to preserve control-flow after sorting + nfixupstmts = 0 + + # node queued up for scheduling (-1 === nothing) + node_to_schedule = 1 + worklist = Int[] + while node_to_schedule !== -1 + # First assign a new BB index to `node_to_schedule` + push!(result_order, node_to_schedule) + bb_rename[node_to_schedule] = length(result_order) + cs = domtree.nodes[node_to_schedule].children + terminator = ir[SSAValue(last(ir.cfg.blocks[node_to_schedule].stmts))][:stmt] + fallthrough = node_to_schedule + 1 + node_to_schedule = -1 + # Adding the nodes in reverse sorted order attempts to retain # the original source order of the nodes as much as possible. # This is not required for correctness, but is easier on the humans - for child in Iterators.Reverse(cs) - if child == next_node + for node in Iterators.Reverse(cs) + if node == fallthrough # Schedule the fall through node first, # so we can retain the fall through - node = next_node + node_to_schedule = node else - push!(stack, child) + push!(worklist, node) end end - if node == -1 && !isempty(stack) - node = pop!(stack) + if node_to_schedule == -1 && !isempty(worklist) + node_to_schedule = pop!(worklist) end - if node != next_node && !isa(terminator, Union{GotoNode, ReturnNode}) + # If a fallthrough successor is no longer the fallthrough after sorting, we need to + # add a GotoNode (and either extend or split the basic block as necessary) + if node_to_schedule != fallthrough && !isa(terminator, Union{GotoNode, ReturnNode}) if isa(terminator, GotoIfNot) # Need to break the critical edge - ncritbreaks += 1 + push!(result_order, 0) + elseif isa(terminator, EnterNode) || isexpr(terminator, :leave) + # Cannot extend the BasicBlock with a goto, have to split it push!(result_order, 0) else - nnewfallthroughs += 1 + # No need for a new block, just extend + @assert !isterminator(terminator) end + # Reserve space for the fixup goto + nfixupstmts += 1 end end new_bbs = Vector{BasicBlock}(undef, length(result_order)) @@ -385,7 +400,7 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree) nstmts += length(ir.cfg.blocks[i].stmts) end end - result = InstructionStream(nstmts + ncritbreaks + nnewfallthroughs) + result = InstructionStream(nstmts + nfixupstmts) inst_rename = Vector{SSAValue}(undef, length(ir.stmts) + length(ir.new_nodes)) @inbounds for i = 1:length(ir.stmts) inst_rename[i] = SSAValue(-1) @@ -394,7 +409,6 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree) inst_rename[i + length(ir.stmts)] = SSAValue(i + length(result)) end bb_start_off = 0 - crit_edge_breaks_fixup = Tuple{Int, Int}[] for (new_bb, bb) in pairs(result_order) if bb == 0 nidx = bb_start_off + 1 @@ -426,8 +440,8 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree) else result[inst_range[end]][:stmt] = GotoNode(bb_rename[terminator.label]) end - elseif isa(terminator, GotoIfNot) - # Check if we need to break the critical edge + elseif isa(terminator, GotoIfNot) || isa(terminator, EnterNode) || isexpr(terminator, :leave) + # Check if we need to break the critical edge or split the block if bb_rename[bb + 1] != new_bb + 1 @assert result_order[new_bb + 1] == 0 # Add an explicit goto node in the next basic block (we accounted for this above) @@ -435,11 +449,14 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree) node = result[nidx] node[:stmt], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, NoLineUpdate end - result[inst_range[end]][:stmt] = GotoIfNot(terminator.cond, bb_rename[terminator.dest]) - elseif !isa(terminator, ReturnNode) - if isa(terminator, EnterNode) + if isa(terminator, GotoIfNot) + result[inst_range[end]][:stmt] = GotoIfNot(terminator.cond, bb_rename[terminator.dest]) + elseif isa(terminator, EnterNode) result[inst_range[end]][:stmt] = EnterNode(terminator, terminator.catch_dest == 0 ? 0 : bb_rename[terminator.catch_dest]) + else + @assert isexpr(terminator, :leave) end + elseif !isa(terminator, ReturnNode) if bb_rename[bb + 1] != new_bb + 1 # Add an explicit goto node nidx = inst_range[end] + 1 @@ -452,7 +469,7 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree) local new_preds, new_succs let bb = bb, bb_rename = bb_rename, result_order = result_order new_preds = Int[bb for bb in (rename_incoming_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].preds) if bb != -1] - new_succs = Int[ rename_outgoing_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].succs] + new_succs = Int[ rename_outgoing_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].succs] end new_bbs[new_bb] = BasicBlock(inst_range, new_preds, new_succs) end diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index 740ac5f4958e4..13ef05db2f23a 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -1967,3 +1967,32 @@ let f = (x)->nothing, mi = Base.method_instance(f, (Base.RefValue{Nothing},)), c ir = Core.Compiler.sroa_pass!(ir, inlining) Core.Compiler.verify_ir(ir) end + +let code = Any[ + # block 1 + GotoNode(4), # skip + # block 2 + Expr(:leave, SSAValue(1)), # not domsorted - make sure we move it correctly + # block 3 + ReturnNode(2), + # block 4 + EnterNode(7), + # block 5 + GotoIfNot(Argument(1), 2), + # block 6 + Expr(:leave, SSAValue(1)), + # block 7 + ReturnNode(1), + # block 8 + ReturnNode(nothing), + ] + ir = make_ircode(code; ssavaluetypes=Any[Any, Any, Union{}, Any, Any, Any, Union{}, Union{}]) + @test length(ir.cfg.blocks) == 8 + Core.Compiler.verify_ir(ir) + + # The IR should remain valid after domsorting + # (esp. including the insertion of new BasicBlocks for any fix-ups) + domtree = Core.Compiler.construct_domtree(ir) + ir = Core.Compiler.domsort_ssa!(ir, domtree) + Core.Compiler.verify_ir(ir) +end From ca3713e7ac8489acd1afe0a47dc8ceeaafb4a292 Mon Sep 17 00:00:00 2001 From: Neven Sajko Date: Fri, 18 Oct 2024 16:29:42 +0200 Subject: [PATCH 07/47] fix infinite recursion in `promote_type` for `Irrational` (#55870) Fixes #51001 --- base/irrationals.jl | 11 ++++++++++- test/numbers.jl | 8 ++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/base/irrationals.jl b/base/irrationals.jl index c51b66045723f..76222997865c0 100644 --- a/base/irrationals.jl +++ b/base/irrationals.jl @@ -45,7 +45,16 @@ promote_rule(::Type{<:AbstractIrrational}, ::Type{Float16}) = Float16 promote_rule(::Type{<:AbstractIrrational}, ::Type{Float32}) = Float32 promote_rule(::Type{<:AbstractIrrational}, ::Type{<:AbstractIrrational}) = Float64 promote_rule(::Type{<:AbstractIrrational}, ::Type{T}) where {T<:Real} = promote_type(Float64, T) -promote_rule(::Type{S}, ::Type{T}) where {S<:AbstractIrrational,T<:Number} = promote_type(promote_type(S, real(T)), T) + +function promote_rule(::Type{S}, ::Type{T}) where {S<:AbstractIrrational,T<:Number} + U = promote_type(S, real(T)) + if S <: U + # prevent infinite recursion + promote_type(Float64, T) + else + promote_type(U, T) + end +end AbstractFloat(x::AbstractIrrational) = Float64(x)::Float64 Float16(x::AbstractIrrational) = Float16(Float32(x)::Float32) diff --git a/test/numbers.jl b/test/numbers.jl index fc3dc2c06bb7c..dc4f2cb613d77 100644 --- a/test/numbers.jl +++ b/test/numbers.jl @@ -2937,6 +2937,14 @@ end @test log(π,ComplexF32(2)) isa ComplexF32 end +@testset "irrational promotion shouldn't recurse without bound, issue #51001" begin + for s ∈ (:π, :ℯ) + T = Irrational{s} + @test promote_type(Complex{T}, T) <: Complex + @test promote_type(T, Complex{T}) <: Complex + end +end + @testset "printing non finite floats" begin let float_types = Set() allsubtypes!(Base, AbstractFloat, float_types) From e5aff12f63f7a2141f1f4a3eb69ff049618d6fbc Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 18 Oct 2024 12:01:45 -0300 Subject: [PATCH 08/47] codegen: replace store of freeze in allocop and in emit new struct with memset since aggregate stores are bad (#55879) This fixes the issues found in slack in the reinterprets of ```julia julia> split128_v2(x::UInt128) = (first(reinterpret(NTuple{2, UInt}, x)), last(reinterpret(NTuple{2, UInt}, x))) split128_v2 (generic function with 1 method) julia> split128(x::UInt128) = reinterpret(NTuple{2, UInt}, x) split128 (generic function with 1 method) @code_native split128(UInt128(5)) push rbp mov rbp, rsp mov rax, rdi mov qword ptr [rdi + 8], rdx mov qword ptr [rdi], rsi pop rbp ret @code_native split128_v2(UInt128(5)) push rbp mov rbp, rsp mov rax, rdi mov qword ptr [rdi], rsi mov qword ptr [rdi + 8], rdx pop rbp ret ``` vs on master where ```julia julia> @code_native split128(UInt128(5)) push rbp mov rbp, rsp mov eax, esi shr eax, 8 mov ecx, esi shr ecx, 16 mov r8, rsi mov r9, rsi vmovd xmm0, esi vpinsrb xmm0, xmm0, eax, 1 mov rax, rsi vpinsrb xmm0, xmm0, ecx, 2 mov rcx, rsi shr esi, 24 vpinsrb xmm0, xmm0, esi, 3 shr r8, 32 vpinsrb xmm0, xmm0, r8d, 4 shr r9, 40 vpinsrb xmm0, xmm0, r9d, 5 shr rax, 48 vpinsrb xmm0, xmm0, eax, 6 shr rcx, 56 vpinsrb xmm0, xmm0, ecx, 7 vpinsrb xmm0, xmm0, edx, 8 mov eax, edx shr eax, 8 vpinsrb xmm0, xmm0, eax, 9 mov eax, edx shr eax, 16 vpinsrb xmm0, xmm0, eax, 10 mov eax, edx shr eax, 24 vpinsrb xmm0, xmm0, eax, 11 mov rax, rdx shr rax, 32 vpinsrb xmm0, xmm0, eax, 12 mov rax, rdx shr rax, 40 vpinsrb xmm0, xmm0, eax, 13 mov rax, rdx shr rax, 48 vpinsrb xmm0, xmm0, eax, 14 mov rax, rdi shr rdx, 56 vpinsrb xmm0, xmm0, edx, 15 vmovdqu xmmword ptr [rdi], xmm0 pop rbp ret ``` --- src/cgutils.cpp | 31 ++++++++++++------------------- src/llvm-alloc-opt.cpp | 11 +++-------- test/llvmpasses/alloc-opt-pass.ll | 11 ++++------- 3 files changed, 19 insertions(+), 34 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 4547e693755cd..a166b0a2c4800 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -4213,7 +4213,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg else { strct = UndefValue::get(lt); if (nargs < nf) - strct = ctx.builder.CreateFreeze(strct); + strct = ctx.builder.CreateFreeze(strct); // Change this to zero initialize instead? } } else if (tracked.second) { @@ -4380,25 +4380,18 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg ctx.builder.restoreIP(savedIP); } } - for (size_t i = nargs; i < nf; i++) { - if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) { - ssize_t offs = jl_field_offset(sty, i); - ssize_t ptrsoffs = -1; - if (!inline_roots.empty()) - std::tie(offs, ptrsoffs) = split_value_field(sty, i); - assert(ptrsoffs < 0 && offs >= 0); - int fsz = jl_field_size(sty, i) - 1; - if (init_as_value) { + if (init_as_value) { + for (size_t i = nargs; i < nf; i++) { + if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) { + ssize_t offs = jl_field_offset(sty, i); + ssize_t ptrsoffs = -1; + if (!inline_roots.empty()) + std::tie(offs, ptrsoffs) = split_value_field(sty, i); + assert(ptrsoffs < 0 && offs >= 0); + int fsz = jl_field_size(sty, i) - 1; unsigned llvm_idx = convert_struct_offset(ctx, cast(lt), offs + fsz); strct = ctx.builder.CreateInsertValue(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), ArrayRef(llvm_idx)); } - else { - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte); - Instruction *dest = cast(emit_ptrgep(ctx, strct, offs + fsz)); - if (promotion_point == nullptr) - promotion_point = dest; - ai.decorateInst(ctx.builder.CreateAlignedStore(ctx.builder.getInt8(0), dest, Align(1))); - } } } if (nargs < nf) { @@ -4407,9 +4400,9 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg if (promotion_point) ctx.builder.SetInsertPoint(promotion_point); if (strct) { - promotion_point = cast(ctx.builder.CreateFreeze(UndefValue::get(lt))); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack); - ai.decorateInst(ctx.builder.CreateStore(promotion_point, strct)); + promotion_point = ai.decorateInst(ctx.builder.CreateMemSet(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), + jl_datatype_size(ty), MaybeAlign(jl_datatype_align(ty)))); } ctx.builder.restoreIP(savedIP); } diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 0ec88c9d56356..a9e1b1e02da42 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -646,14 +646,9 @@ void Optimizer::initializeAlloca(IRBuilder<> &prolog_builder, AllocaInst *buff, return; assert(!buff->isArrayAllocation()); Type *T = buff->getAllocatedType(); - Value *Init = UndefValue::get(T); - if ((allockind & AllocFnKind::Zeroed) != AllocFnKind::Unknown) - Init = Constant::getNullValue(T); // zero, as described - else if (allockind == AllocFnKind::Unknown) - Init = Constant::getNullValue(T); // assume zeroed since we didn't find the attribute - else - Init = prolog_builder.CreateFreeze(UndefValue::get(T)); // assume freeze, since LLVM does not natively support this case - prolog_builder.CreateStore(Init, buff); + const DataLayout &DL = F.getParent()->getDataLayout(); + prolog_builder.CreateMemSet(buff, ConstantInt::get(Type::getInt8Ty(prolog_builder.getContext()), 0), DL.getTypeAllocSize(T), buff->getAlign()); + } // This function should not erase any safepoint so that the lifetime marker can find and cache diff --git a/test/llvmpasses/alloc-opt-pass.ll b/test/llvmpasses/alloc-opt-pass.ll index b962157120456..665687e86835d 100644 --- a/test/llvmpasses/alloc-opt-pass.ll +++ b/test/llvmpasses/alloc-opt-pass.ll @@ -76,7 +76,7 @@ L3: ; preds = %L2, %L1, %0 ; CHECK-LABEL: @legal_int_types ; CHECK: alloca [12 x i8] ; CHECK-NOT: alloca i96 -; CHECK: store [12 x i8] zeroinitializer, +; CHECK: call void @llvm.memset.p0.i64(ptr align 16 %var1, ; CHECK: ret void define void @legal_int_types() { %pgcstack = call ptr @julia.get_pgcstack() @@ -140,7 +140,7 @@ L2: ; preds = %0 ; CHECK: alloca ; CHECK-NOT: call token(...) @llvm.julia.gc_preserve_begin ; CHECK: call void @llvm.lifetime.start -; CHECK: store [8 x i8] zeroinitializer, +; CHECK: call void @llvm.memset.p0.i64(ptr align 16 %v, ; CHECK-NOT: call void @llvm.lifetime.end define void @lifetime_no_preserve_end(ptr noalias nocapture noundef nonnull sret({}) %0) { %pgcstack = call ptr @julia.get_pgcstack() @@ -164,11 +164,8 @@ define void @lifetime_no_preserve_end(ptr noalias nocapture noundef nonnull sret ; CHECK: alloca [1 x i8] ; CHECK-DAG: alloca [2 x i8] ; CHECK-DAG: alloca [3 x i8] -; CHECK-DAG: freeze [1 x i8] undef -; CHECK-DAG: store [1 x i8] % -; CHECK-DAG: store [3 x i8] zeroinitializer, -; CHECK-NOT: store -; CHECK-NOT: zeroinitializer +; CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 1 %var1, +; CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 4 %var7, ; CHECK: ret void define void @initializers() { %pgcstack = call ptr @julia.get_pgcstack() From 1157c6f9c400d2409bd27225e252203800c46bbf Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Fri, 18 Oct 2024 15:13:33 -0400 Subject: [PATCH 09/47] fix reporting of precompile configs on CI (#56232) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the header doesn't print for `Pkg.test` with coverage on ``` [8dfed614] Test v1.11.0 1077.2 ms ✓ RequiredInterfaces 1 dependency successfully precompiled in 1 seconds. 8 already precompiled. ``` --- base/precompilation.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/precompilation.jl b/base/precompilation.jl index a39563178632f..359d2b61800b1 100644 --- a/base/precompilation.jl +++ b/base/precompilation.jl @@ -800,8 +800,8 @@ function precompilepkgs(pkgs::Vector{String}=String[]; name *= color_string(" $(config_str)", :light_black) end lock(print_lock) do - if !fancyprint && target === nothing && isempty(pkg_queue) - printpkgstyle(io, :Precompiling, "packages...") + if !fancyprint && isempty(pkg_queue) + printpkgstyle(io, :Precompiling, something(target, "packages...")) end end push!(pkg_queue, pkg_config) From fc40e629b1d2ddc94ad9ecb87bc308b2892044e5 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 18 Oct 2024 15:33:29 -0400 Subject: [PATCH 10/47] stream: fix reading LibuvStream into array (#56092) Adds a new internal function `_take!(dst::Array{T,N}, src::Array{T,N})` for doing an efficient `copyto!` equivalent. Previously it was assumed that `compact` did this automatically, which wasn't a great assumption. Fixes #56078 --- base/array.jl | 11 +++++++++++ base/stream.jl | 4 +++- test/read.jl | 22 +++++++++++++++------- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/base/array.jl b/base/array.jl index a628c1212659d..40907b2b00317 100644 --- a/base/array.jl +++ b/base/array.jl @@ -355,6 +355,17 @@ copy return $(Expr(:new, :(typeof(a)), :(memoryref(newmem)), :(a.size))) end +# a mutating version of copyto! that results in dst aliasing src afterwards +function _take!(dst::Array{T,N}, src::Array{T,N}) where {T,N} + if getfield(dst, :ref) !== getfield(src, :ref) + setfield!(dst, :ref, getfield(src, :ref)) + end + if getfield(dst, :size) !== getfield(src, :size) + setfield!(dst, :size, getfield(src, :size)) + end + return dst +end + ## Constructors ## similar(a::Array{T,1}) where {T} = Vector{T}(undef, size(a,1)) diff --git a/base/stream.jl b/base/stream.jl index 3ca5717be29db..2f00538ad0e96 100644 --- a/base/stream.jl +++ b/base/stream.jl @@ -941,6 +941,7 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int) if bytesavailable(sbuf) >= nb nread = readbytes!(sbuf, a, nb) else + initsize = length(a) newbuf = PipeBuffer(a, maxsize=nb) newbuf.size = newbuf.offset # reset the write pointer to the beginning nread = try @@ -951,7 +952,8 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int) finally s.buffer = sbuf end - compact(newbuf) + _take!(a, _unsafe_take!(newbuf)) + length(a) >= initsize || resize!(a, initsize) end iolock_end() return nread diff --git a/test/read.jl b/test/read.jl index 34224c146864e..99903d92d270f 100644 --- a/test/read.jl +++ b/test/read.jl @@ -268,13 +268,27 @@ for (name, f) in l n2 = readbytes!(s2, a2) @test n1 == n2 @test length(a1) == length(a2) - @test a1[1:n1] == a2[1:n2] + let l = min(l, n) + @test a1[1:l] == a2[1:l] + end @test n <= length(text) || eof(s1) @test n <= length(text) || eof(s2) cleanup() end + # Test growing output array + let x = UInt8[], + io = io() + n = readbytes!(io, x) + @test n == 0 + @test isempty(x) + n = readbytes!(io, x, typemax(Int)) + @test n == length(x) + @test x == codeunits(text) + cleanup() + end + verbose && println("$name read!...") l = length(text) for n = [1, 2, l-2, l-1, l] @@ -477,12 +491,6 @@ let s = "qwerty" @test read(IOBuffer(s)) == codeunits(s) @test read(IOBuffer(s), 10) == codeunits(s) @test read(IOBuffer(s), 1) == codeunits(s)[1:1] - - # Test growing output array - x = UInt8[] - n = readbytes!(IOBuffer(s), x, 10) - @test x == codeunits(s) - @test n == length(x) end From 82b150645e318bcb6bcb450e945b3b689a1eb264 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 18 Oct 2024 15:37:36 -0400 Subject: [PATCH 11/47] fix precompile process flag propagation (#56214) CacheFlags could get set, but were never propagated to the target process, so the result would be unusable. Additionally, the debug and optimization levels were not synchronized with the sysimg, causing a regression in pkgimage usability after moving out stdlibs. Fixes #56207 Fixes #56054 Fixes #56206 --- base/Base.jl | 2 +- base/loading.jl | 61 ++++++++++++++++++++++++++++------------ base/precompilation.jl | 63 +++++++++++++++++++++++++++--------------- base/show.jl | 5 +++- base/util.jl | 3 +- base/uuid.jl | 2 ++ pkgimage.mk | 3 +- src/staticdata_utils.c | 15 +++++----- test/loading.jl | 22 +++++++-------- 9 files changed, 112 insertions(+), 64 deletions(-) diff --git a/base/Base.jl b/base/Base.jl index 1e780bb15141a..9800462f855f9 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -533,6 +533,7 @@ include("deepcopy.jl") include("download.jl") include("summarysize.jl") include("errorshow.jl") +include("util.jl") include("initdefs.jl") Filesystem.__postinit__() @@ -549,7 +550,6 @@ include("loading.jl") # misc useful functions & macros include("timing.jl") -include("util.jl") include("client.jl") include("asyncmap.jl") diff --git a/base/loading.jl b/base/loading.jl index 49d0cb52cd37b..78c584d00852b 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1683,6 +1683,8 @@ function CacheFlags(cf::CacheFlags=CacheFlags(ccall(:jl_cache_flags, UInt8, ())) opt_level === nothing ? cf.opt_level : opt_level ) end +# reflecting jloptions.c defaults +const DefaultCacheFlags = CacheFlags(use_pkgimages=true, debug_level=isdebugbuild() ? 2 : 1, check_bounds=0, inline=true, opt_level=2) function _cacheflag_to_uint8(cf::CacheFlags)::UInt8 f = UInt8(0) @@ -1694,12 +1696,29 @@ function _cacheflag_to_uint8(cf::CacheFlags)::UInt8 return f end +function translate_cache_flags(cacheflags::CacheFlags, defaultflags::CacheFlags) + opts = String[] + cacheflags.use_pkgimages != defaultflags.use_pkgimages && push!(opts, cacheflags.use_pkgimages ? "--pkgimages=yes" : "--pkgimages=no") + cacheflags.debug_level != defaultflags.debug_level && push!(opts, "-g$(cacheflags.debug_level)") + cacheflags.check_bounds != defaultflags.check_bounds && push!(opts, ("--check-bounds=auto", "--check-bounds=yes", "--check-bounds=no")[cacheflags.check_bounds + 1]) + cacheflags.inline != defaultflags.inline && push!(opts, cacheflags.inline ? "--inline=yes" : "--inline=no") + cacheflags.opt_level != defaultflags.opt_level && push!(opts, "-O$(cacheflags.opt_level)") + return opts +end + function show(io::IO, cf::CacheFlags) - print(io, "use_pkgimages = ", cf.use_pkgimages) - print(io, ", debug_level = ", cf.debug_level) - print(io, ", check_bounds = ", cf.check_bounds) - print(io, ", inline = ", cf.inline) - print(io, ", opt_level = ", cf.opt_level) + print(io, "CacheFlags(") + print(io, "; use_pkgimages=") + print(io, cf.use_pkgimages) + print(io, ", debug_level=") + print(io, cf.debug_level) + print(io, ", check_bounds=") + print(io, cf.check_bounds) + print(io, ", inline=") + print(io, cf.inline) + print(io, ", opt_level=") + print(io, cf.opt_level) + print(io, ")") end struct ImageTarget @@ -2953,7 +2972,8 @@ end const PRECOMPILE_TRACE_COMPILE = Ref{String}() function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::Union{Nothing, String}, - concrete_deps::typeof(_concrete_dependencies), flags::Cmd=``, internal_stderr::IO = stderr, internal_stdout::IO = stdout, isext::Bool=false) + concrete_deps::typeof(_concrete_dependencies), flags::Cmd=``, cacheflags::CacheFlags=CacheFlags(), + internal_stderr::IO = stderr, internal_stdout::IO = stdout, isext::Bool=false) @nospecialize internal_stderr internal_stdout rm(output, force=true) # Remove file if it exists output_o === nothing || rm(output_o, force=true) @@ -2996,24 +3016,29 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, output_o:: deps = deps_eltype * "[" * join(deps_strs, ",") * "]" precomp_stack = "Base.PkgId[$(join(map(pkg_str, vcat(Base.precompilation_stack, pkg)), ", "))]" + if output_o === nothing + # remove options that make no difference given the other cache options + cacheflags = CacheFlags(cacheflags, opt_level=0) + end + opts = translate_cache_flags(cacheflags, CacheFlags()) # julia_cmd is generated for the running system, and must be fixed if running for precompile instead if output_o !== nothing @debug "Generating object cache file for $(repr("text/plain", pkg))" cpu_target = get(ENV, "JULIA_CPU_TARGET", nothing) - opts = `--output-o $(output_o) --output-ji $(output) --output-incremental=yes` + push!(opts, "--output-o", output_o) else @debug "Generating cache file for $(repr("text/plain", pkg))" cpu_target = nothing - opts = `-O0 --output-ji $(output) --output-incremental=yes` end + push!(opts, "--output-ji", output) + isassigned(PRECOMPILE_TRACE_COMPILE) && push!(opts, "--trace-compile=$(PRECOMPILE_TRACE_COMPILE[])") - trace = isassigned(PRECOMPILE_TRACE_COMPILE) ? `--trace-compile=$(PRECOMPILE_TRACE_COMPILE[]) --trace-compile-timing` : `` io = open(pipeline(addenv(`$(julia_cmd(;cpu_target)::Cmd) - $(flags) - $(opts) - --startup-file=no --history-file=no --warn-overwrite=yes - --color=$(have_color === nothing ? "auto" : have_color ? "yes" : "no") - $trace - -`, + $(flags) + $(opts) + --output-incremental=yes + --startup-file=no --history-file=no --warn-overwrite=yes + $(have_color === nothing ? "--color=auto" : have_color ? "--color=yes" : "--color=no") + -`, "OPENBLAS_NUM_THREADS" => 1, "JULIA_NUM_THREADS" => 1), stderr = internal_stderr, stdout = internal_stdout), @@ -3131,7 +3156,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in close(tmpio_o) close(tmpio_so) end - p = create_expr_cache(pkg, path, tmppath, tmppath_o, concrete_deps, flags, internal_stderr, internal_stdout, isext) + p = create_expr_cache(pkg, path, tmppath, tmppath_o, concrete_deps, flags, cacheflags, internal_stderr, internal_stdout, isext) if success(p) if cache_objects @@ -4136,5 +4161,5 @@ end precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), Nothing)) || @assert false precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), String)) || @assert false -precompile(create_expr_cache, (PkgId, String, String, String, typeof(_concrete_dependencies), Cmd, IO, IO)) || @assert false -precompile(create_expr_cache, (PkgId, String, String, Nothing, typeof(_concrete_dependencies), Cmd, IO, IO)) || @assert false +precompile(create_expr_cache, (PkgId, String, String, String, typeof(_concrete_dependencies), Cmd, CacheFlags, IO, IO)) || @assert false +precompile(create_expr_cache, (PkgId, String, String, Nothing, typeof(_concrete_dependencies), Cmd, CacheFlags, IO, IO)) || @assert false diff --git a/base/precompilation.jl b/base/precompilation.jl index 359d2b61800b1..f597acef9b57f 100644 --- a/base/precompilation.jl +++ b/base/precompilation.jl @@ -362,7 +362,7 @@ function printpkgstyle(io, header, msg; color=:green) end const Config = Pair{Cmd, Base.CacheFlags} -const PkgConfig = Tuple{Base.PkgId,Config} +const PkgConfig = Tuple{PkgId,Config} function precompilepkgs(pkgs::Vector{String}=String[]; internal_call::Bool=false, @@ -375,8 +375,22 @@ function precompilepkgs(pkgs::Vector{String}=String[]; # asking for timing disables fancy mode, as timing is shown in non-fancy mode fancyprint::Bool = can_fancyprint(io) && !timing, manifest::Bool=false,) + # monomorphize this to avoid latency problems + _precompilepkgs(pkgs, internal_call, strict, warn_loaded, timing, _from_loading, + configs isa Vector{Config} ? configs : [configs], + IOContext{IO}(io), fancyprint, manifest) +end - configs = configs isa Config ? [configs] : configs +function _precompilepkgs(pkgs::Vector{String}, + internal_call::Bool, + strict::Bool, + warn_loaded::Bool, + timing::Bool, + _from_loading::Bool, + configs::Vector{Config}, + io::IOContext{IO}, + fancyprint::Bool, + manifest::Bool) requested_pkgs = copy(pkgs) # for understanding user intent time_start = time_ns() @@ -393,17 +407,32 @@ function precompilepkgs(pkgs::Vector{String}=String[]; if _from_loading && !Sys.isinteractive() && Base.get_bool_env("JULIA_TESTS", false) # suppress passive loading printing in julia test suite. `JULIA_TESTS` is set in Base.runtests - io = devnull + io = IOContext{IO}(devnull) end + nconfigs = length(configs) hascolor = get(io, :color, false)::Bool color_string(cstr::String, col::Union{Int64, Symbol}) = _color_string(cstr, col, hascolor) stale_cache = Dict{StaleCacheKey, Bool}() - exts = Dict{Base.PkgId, String}() # ext -> parent + exts = Dict{PkgId, String}() # ext -> parent # make a flat map of each dep and its direct deps - depsmap = Dict{Base.PkgId, Vector{Base.PkgId}}() - pkg_exts_map = Dict{Base.PkgId, Vector{Base.PkgId}}() + depsmap = Dict{PkgId, Vector{PkgId}}() + pkg_exts_map = Dict{PkgId, Vector{PkgId}}() + + function describe_pkg(pkg::PkgId, is_direct_dep::Bool, flags::Cmd, cacheflags::Base.CacheFlags) + name = haskey(exts, pkg) ? string(exts[pkg], " → ", pkg.name) : pkg.name + name = is_direct_dep ? name : color_string(name, :light_black) + if nconfigs > 1 && !isempty(flags) + config_str = join(flags, " ") + name *= color_string(" `$config_str`", :light_black) + end + if nconfigs > 1 + config_str = join(Base.translate_cache_flags(cacheflags, Base.DefaultCacheFlags), " ") + name *= color_string(" $config_str", :light_black) + end + return name + end for (dep, deps) in env.deps pkg = Base.PkgId(dep, env.names[dep]) @@ -569,7 +598,6 @@ function precompilepkgs(pkgs::Vector{String}=String[]; end end - nconfigs = length(configs) target = nothing if nconfigs == 1 if !isempty(only(configs)[1]) @@ -584,7 +612,7 @@ function precompilepkgs(pkgs::Vector{String}=String[]; failed_deps = Dict{PkgConfig, String}() precomperr_deps = PkgConfig[] # packages that may succeed after a restart (i.e. loaded packages with no cache file) - print_lock = io isa Base.LibuvStream ? io.lock::ReentrantLock : ReentrantLock() + print_lock = io.io isa Base.LibuvStream ? io.io.lock::ReentrantLock : ReentrantLock() first_started = Base.Event() printloop_should_exit::Bool = !fancyprint # exit print loop immediately if not fancy printing interrupted_or_done = Base.Event() @@ -677,7 +705,7 @@ function precompilepkgs(pkgs::Vector{String}=String[]; n_print_rows = 0 while !printloop_should_exit lock(print_lock) do - term_size = Base.displaysize_(io) + term_size = displaysize(io) num_deps_show = max(term_size[1] - 3, 2) # show at least 2 deps pkg_queue_show = if !interrupted_or_done.set && length(pkg_queue) > num_deps_show last(pkg_queue, num_deps_show) @@ -692,7 +720,7 @@ function precompilepkgs(pkgs::Vector{String}=String[]; bar.max = n_total - n_already_precomp # when sizing to the terminal width subtract a little to give some tolerance to resizing the # window between print cycles - termwidth = Base.displaysize_(io)[2] - 4 + termwidth = displaysize(io)[2] - 4 if !final_loop str = sprint(io -> show_progress(io, bar; termwidth, carriagereturn=false); context=io) print(iostr, Base._truncate_at_width_or_chars(true, str, termwidth), "\n") @@ -700,12 +728,8 @@ function precompilepkgs(pkgs::Vector{String}=String[]; for pkg_config in pkg_queue_show dep, config = pkg_config loaded = warn_loaded && haskey(Base.loaded_modules, dep) - _name = haskey(exts, dep) ? string(exts[dep], " → ", dep.name) : dep.name - name = dep in direct_deps ? _name : string(color_string(_name, :light_black)) - if nconfigs > 1 && !isempty(config[1]) - config_str = "$(join(config[1], " "))" - name *= color_string(" $(config_str)", :light_black) - end + flags, cacheflags = config + name = describe_pkg(dep, dep in direct_deps, flags, cacheflags) line = if pkg_config in precomperr_deps string(color_string(" ? ", Base.warn_color()), name) elseif haskey(failed_deps, pkg_config) @@ -793,12 +817,7 @@ function precompilepkgs(pkgs::Vector{String}=String[]; std_pipe = Base.link_pipe!(Pipe(); reader_supports_async=true, writer_supports_async=true) t_monitor = @async monitor_std(pkg_config, std_pipe; single_requested_pkg) - _name = haskey(exts, pkg) ? string(exts[pkg], " → ", pkg.name) : pkg.name - name = is_direct_dep ? _name : string(color_string(_name, :light_black)) - if nconfigs > 1 && !isempty(flags) - config_str = "$(join(flags, " "))" - name *= color_string(" $(config_str)", :light_black) - end + name = describe_pkg(pkg, is_direct_dep, flags, cacheflags) lock(print_lock) do if !fancyprint && isempty(pkg_queue) printpkgstyle(io, :Precompiling, something(target, "packages...")) diff --git a/base/show.jl b/base/show.jl index fb932838ac69a..25ed99f50b5b0 100644 --- a/base/show.jl +++ b/base/show.jl @@ -324,8 +324,11 @@ end convert(::Type{IOContext}, io::IOContext) = io convert(::Type{IOContext}, io::IO) = IOContext(io, ioproperties(io))::IOContext +convert(::Type{IOContext{IO_t}}, io::IOContext{IO_t}) where {IO_t} = io +convert(::Type{IOContext{IO_t}}, io::IO) where {IO_t} = IOContext{IO_t}(io, ioproperties(io))::IOContext{IO_t} IOContext(io::IO) = convert(IOContext, io) +IOContext{IO_t}(io::IO) where {IO_t} = convert(IOContext{IO_t}, io) function IOContext(io::IO, KV::Pair) d = ioproperties(io) @@ -427,7 +430,7 @@ get(io::IO, key, default) = default keys(io::IOContext) = keys(io.dict) keys(io::IO) = keys(ImmutableDict{Symbol,Any}()) -displaysize(io::IOContext) = haskey(io, :displaysize) ? io[:displaysize]::Tuple{Int,Int} : Base.displaysize_(io.io) +displaysize(io::IOContext) = haskey(io, :displaysize) ? io[:displaysize]::Tuple{Int,Int} : displaysize(io.io) show_circular(io::IO, @nospecialize(x)) = false function show_circular(io::IOContext, @nospecialize(x)) diff --git a/base/util.jl b/base/util.jl index 95d62c4a16e1d..3ce64e50f7e29 100644 --- a/base/util.jl +++ b/base/util.jl @@ -249,7 +249,7 @@ function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Unio end function julia_exename() - if !Base.isdebugbuild() + if !isdebugbuild() return @static Sys.iswindows() ? "julia.exe" : "julia" else return @static Sys.iswindows() ? "julia-debug.exe" : "julia-debug" @@ -530,7 +530,6 @@ function _crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000) end _crc32c(io::IO, crc::UInt32=0x00000000) = _crc32c(io, typemax(Int64), crc) _crc32c(io::IOStream, crc::UInt32=0x00000000) = _crc32c(io, filesize(io)-position(io), crc) -_crc32c(uuid::UUID, crc::UInt32=0x00000000) = _crc32c(uuid.value, crc) _crc32c(x::UInt128, crc::UInt32=0x00000000) = ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt128}, Csize_t), crc, x, 16) _crc32c(x::UInt64, crc::UInt32=0x00000000) = diff --git a/base/uuid.jl b/base/uuid.jl index 9b2da3c6409db..56f3a6aa417e7 100644 --- a/base/uuid.jl +++ b/base/uuid.jl @@ -36,6 +36,8 @@ let Base.hash(uuid::UUID, h::UInt) = hash(uuid_hash_seed, hash(convert(NTuple{2, UInt64}, uuid), h)) end +_crc32c(uuid::UUID, crc::UInt32=0x00000000) = _crc32c(uuid.value, crc) + let @inline function uuid_kernel(s, i, u) _c = UInt32(@inbounds codeunit(s, i)) diff --git a/pkgimage.mk b/pkgimage.mk index 0bc035ee03b08..78b2618be549f 100644 --- a/pkgimage.mk +++ b/pkgimage.mk @@ -25,7 +25,8 @@ print-depot-path: @$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e '@show Base.DEPOT_PATH') $(BUILDDIR)/stdlib/%.image: $(JULIAHOME)/stdlib/Project.toml $(JULIAHOME)/stdlib/Manifest.toml $(INDEPENDENT_STDLIBS_SRCS) $(JULIA_DEPOT_PATH)/compiled - @$(call PRINT_JULIA, JULIA_CPU_TARGET="$(JULIA_CPU_TARGET)" $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e 'Base.Precompilation.precompilepkgs(;configs=[``=>Base.CacheFlags(), `--check-bounds=yes`=>Base.CacheFlags(;check_bounds=1)])') + @$(call PRINT_JULIA, JULIA_CPU_TARGET="$(JULIA_CPU_TARGET)" $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e \ + 'Base.Precompilation.precompilepkgs(configs=[``=>Base.CacheFlags(debug_level=2, opt_level=3), ``=>Base.CacheFlags(check_bounds=1, debug_level=2, opt_level=3)])') touch $@ $(BUILDDIR)/stdlib/release.image: $(build_private_libdir)/sys.$(SHLIB_EXT) diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index 5f1095fec9168..9a7653972ea7c 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -605,15 +605,15 @@ static void write_mod_list(ios_t *s, jl_array_t *a) write_int32(s, 0); } -// OPT_LEVEL should always be the upper bits #define OPT_LEVEL 6 +#define DEBUG_LEVEL 1 JL_DLLEXPORT uint8_t jl_cache_flags(void) { // OOICCDDP uint8_t flags = 0; flags |= (jl_options.use_pkgimages & 1); // 0-bit - flags |= (jl_options.debug_level & 3) << 1; // 1-2 bit + flags |= (jl_options.debug_level & 3) << DEBUG_LEVEL; // 1-2 bit flags |= (jl_options.check_bounds & 3) << 3; // 3-4 bit flags |= (jl_options.can_inline & 1) << 5; // 5-bit flags |= (jl_options.opt_level & 3) << OPT_LEVEL; // 6-7 bit @@ -636,14 +636,13 @@ JL_DLLEXPORT uint8_t jl_match_cache_flags(uint8_t requested_flags, uint8_t actua actual_flags &= ~1; } - // 2. Check all flags, except opt level must be exact - uint8_t mask = (1 << OPT_LEVEL)-1; + // 2. Check all flags, except opt level and debug level must be exact + uint8_t mask = (~(3u << OPT_LEVEL) & ~(3u << DEBUG_LEVEL)) & 0x7f; if ((actual_flags & mask) != (requested_flags & mask)) return 0; - // 3. allow for higher optimization flags in cache - actual_flags >>= OPT_LEVEL; - requested_flags >>= OPT_LEVEL; - return actual_flags >= requested_flags; + // 3. allow for higher optimization and debug level flags in cache to minimize required compile option combinations + return ((actual_flags >> OPT_LEVEL) & 3) >= ((requested_flags >> OPT_LEVEL) & 3) && + ((actual_flags >> DEBUG_LEVEL) & 3) >= ((requested_flags >> DEBUG_LEVEL) & 3); } JL_DLLEXPORT uint8_t jl_match_cache_flags_current(uint8_t flags) diff --git a/test/loading.jl b/test/loading.jl index 9e7e40ff3b50a..ec4a0391a412a 100644 --- a/test/loading.jl +++ b/test/loading.jl @@ -1225,10 +1225,7 @@ end @test cf.check_bounds == 3 @test cf.inline @test cf.opt_level == 3 - - io = PipeBuffer() - show(io, cf) - @test read(io, String) == "use_pkgimages = true, debug_level = 3, check_bounds = 3, inline = true, opt_level = 3" + @test repr(cf) == "CacheFlags(; use_pkgimages=true, debug_level=3, check_bounds=3, inline=true, opt_level=3)" end empty!(Base.DEPOT_PATH) @@ -1420,13 +1417,16 @@ end "JULIA_DEPOT_PATH" => depot_path, "JULIA_DEBUG" => "loading") - out = Pipe() - proc = run(pipeline(cmd, stdout=out, stderr=out)) - close(out.in) - - log = @async String(read(out)) - @test success(proc) - fetch(log) + out = Base.PipeEndpoint() + log = @async read(out, String) + try + proc = run(pipeline(cmd, stdout=out, stderr=out)) + @test success(proc) + catch + @show fetch(log) + rethrow() + end + return fetch(log) end log = load_package("Parent", `--compiled-modules=no --pkgimages=no`) From f36f34298e5154b14fea1ec680b4e6c369b61d2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= <765740+giordano@users.noreply.github.com> Date: Fri, 18 Oct 2024 22:42:50 +0100 Subject: [PATCH 12/47] Do not call `rand` during sysimage precompilation (#56227) This change by itself doesn't do anything significant on `master`, but when backported to the v1.11 branch it'll address #56177. However it'd be great if someone could tell _why_ this fixes that issue, because it looks very unrelated. --------- Co-authored-by: Ian Butterworth --- contrib/generate_precompile.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl index 04d13011d6223..037e8926d5003 100644 --- a/contrib/generate_precompile.jl +++ b/contrib/generate_precompile.jl @@ -183,10 +183,10 @@ for match = Base._methods(+, (Int, Int), -1, Base.get_world_counter()) # interactive startup uses this write(IOBuffer(), "") - # not critical, but helps hide unrelated compilation from @time when using --trace-compile - foo() = rand(2,2) * rand(2,2) - @time foo() - @time foo() + # Not critical, but helps hide unrelated compilation from @time when using --trace-compile. + f55729() = Base.Experimental.@force_compile + @time @eval f55729() + @time @eval f55729() break # only actually need to do this once end From cd99cfc4d39c09b3edbb6040639b4baa47882f6e Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 18 Oct 2024 22:03:58 -0400 Subject: [PATCH 13/47] Split up the one big codegen lock into per-function locks and dependency edge tracking (#56179) Disjoint content can be LLVM optimized in parallel now, since codegen no longer has any ability to handle recursion, and compilation should even be able to run in parallel with the GC also. Removes any remaining global state, since that is unsafe. Adds a C++ shim for concurrent gc support in conjunction with using a `std::unique_lock` to DRY code. Fix RuntimeDyld implementation: Since we use the ForwardingMemoryManger instead of making a new RTDyldMemoryManager object every time, we need to reference count the finalizeMemory calls so that we only call that at the end of relocating everything when everything is ready. We already happen to conveniently have a shared_ptr here, so just use that instead of inventing a duplicate counter. Fixes many OC bugs, including mostly fixing #55035, since this bug is just that much harder to express in the more constrained API. --- src/aotcompile.cpp | 164 ++++- src/cgmemmgr.cpp | 128 ++-- src/clangsa/GCChecker.cpp | 11 +- src/codegen.cpp | 528 ++++++---------- src/debug-registry.h | 14 +- src/debuginfo.cpp | 31 +- src/engine.cpp | 87 ++- src/gf.c | 17 + src/jitlayers.cpp | 1216 +++++++++++++++++++++++-------------- src/jitlayers.h | 94 +-- src/julia.h | 12 +- src/julia_atomics.h | 4 +- src/julia_internal.h | 7 +- src/julia_locks.h | 27 + src/opaque_closure.c | 11 +- src/pipeline.cpp | 6 + src/stackwalk.c | 4 +- 17 files changed, 1336 insertions(+), 1025 deletions(-) diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 279686c387e1b..a3ffdf1d051a9 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -295,12 +295,12 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_ jl_value_t *ci = cgparams.lookup(mi, world, world); JL_GC_PROMISE_ROOTED(ci); jl_code_instance_t *codeinst = NULL; - JL_GC_PUSH1(&codeinst); if (ci != jl_nothing && jl_atomic_load_relaxed(&((jl_code_instance_t *)ci)->inferred) != jl_nothing) { codeinst = (jl_code_instance_t*)ci; } else { if (cgparams.lookup != jl_rettype_inferred_addr) { + // XXX: This will corrupt and leak a lot of memory which may be very bad jl_error("Refusing to automatically run type inference with custom cache lookup."); } else { @@ -309,15 +309,129 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_ * it into the cache here, since it was explicitly requested and is * otherwise not reachable from anywhere in the system image. */ - if (!jl_mi_cache_has_ci(mi, codeinst)) + if (codeinst && !jl_mi_cache_has_ci(mi, codeinst)) { + JL_GC_PUSH1(&codeinst); jl_mi_cache_insert(mi, codeinst); + JL_GC_POP(); + } } } - JL_GC_POP(); return codeinst; } -arraylist_t new_invokes; +typedef DenseMap> jl_compiled_functions_t; +static void compile_workqueue(jl_codegen_params_t ¶ms, CompilationPolicy policy, jl_compiled_functions_t &compiled_functions) +{ + decltype(params.workqueue) workqueue; + std::swap(params.workqueue, workqueue); + jl_code_info_t *src = NULL; + jl_code_instance_t *codeinst = NULL; + JL_GC_PUSH2(&src, &codeinst); + assert(!params.cache); + while (!workqueue.empty()) { + auto it = workqueue.pop_back_val(); + codeinst = it.first; + auto &proto = it.second; + // try to emit code for this item from the workqueue + StringRef invokeName = ""; + StringRef preal_decl = ""; + bool preal_specsig = false; + { + auto it = compiled_functions.find(codeinst); + if (it == compiled_functions.end()) { + // Reinfer the function. The JIT came along and removed the inferred + // method body. See #34993 + if ((policy != CompilationPolicy::Default || params.params->trim) && + jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) { + // XXX: SOURCE_MODE_FORCE_SOURCE is wrong here (neither sufficient nor necessary) + codeinst = jl_type_infer(codeinst->def, jl_atomic_load_relaxed(&codeinst->max_world), SOURCE_MODE_FORCE_SOURCE); + } + if (codeinst) { + orc::ThreadSafeModule result_m = + jl_create_ts_module(name_from_method_instance(codeinst->def), + params.tsctx, params.DL, params.TargetTriple); + auto decls = jl_emit_codeinst(result_m, codeinst, NULL, params); + if (result_m) + it = compiled_functions.insert(std::make_pair(codeinst, std::make_pair(std::move(result_m), std::move(decls)))).first; + } + } + if (it != compiled_functions.end()) { + auto &decls = it->second.second; + invokeName = decls.functionObject; + if (decls.functionObject == "jl_fptr_args") { + preal_decl = decls.specFunctionObject; + } + else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") { + preal_decl = decls.specFunctionObject; + preal_specsig = true; + } + } + } + // patch up the prototype we emitted earlier + Module *mod = proto.decl->getParent(); + assert(proto.decl->isDeclaration()); + Function *pinvoke = nullptr; + if (preal_decl.empty()) { + if (invokeName.empty() && params.params->trim) { + errs() << "Bailed out to invoke when compiling:"; + jl_(codeinst->def); + abort(); + } + pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params); + if (!proto.specsig) + proto.decl->replaceAllUsesWith(pinvoke); + } + if (proto.specsig && !preal_specsig) { + // get or build an fptr1 that can invoke codeinst + if (pinvoke == nullptr) + pinvoke = get_or_emit_fptr1(preal_decl, mod); + // emit specsig-to-(jl)invoke conversion + proto.decl->setLinkage(GlobalVariable::InternalLinkage); + //protodecl->setAlwaysInline(); + jl_init_function(proto.decl, params.TargetTriple); + jl_method_instance_t *mi = codeinst->def; + size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed + bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; + // TODO: maybe this can be cached in codeinst->specfptr? + emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke, 0, 0); + preal_decl = ""; // no need to fixup the name + } + if (!preal_decl.empty()) { + // merge and/or rename this prototype to the real function + if (Value *specfun = mod->getNamedValue(preal_decl)) { + if (proto.decl != specfun) + proto.decl->replaceAllUsesWith(specfun); + } + else { + proto.decl->setName(preal_decl); + } + } + if (proto.oc) { // additionally, if we are dealing with an oc, then we might also need to fix up the fptr1 reference too + assert(proto.specsig); + StringRef ocinvokeDecl = invokeName; + // if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too + // XXX: this invoke translation logic is supposed to exactly match new_opaque_closure + if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return") + ocinvokeDecl = pinvoke->getName(); + assert(!ocinvokeDecl.empty()); + assert(ocinvokeDecl != "jl_fptr_args"); + assert(ocinvokeDecl != "jl_fptr_sparam"); + // merge and/or rename this prototype to the real function + if (Value *specfun = mod->getNamedValue(ocinvokeDecl)) { + if (proto.oc != specfun) + proto.oc->replaceAllUsesWith(specfun); + } + else { + proto.oc->setName(ocinvokeDecl); + } + } + workqueue.append(params.workqueue); + params.workqueue.clear(); + } + JL_GC_POP(); +} + + // takes the running content that has collected in the shadow module and dump it to disk // this builds the object file portion of the sysimage files for fast startup, and can // also be used be extern consumers like GPUCompiler.jl to obtain a module containing @@ -346,7 +460,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm orc::ThreadSafeContext ctx; orc::ThreadSafeModule backing; if (!llvmmod) { - ctx = jl_ExecutionEngine->acquireContext(); + ctx = jl_ExecutionEngine->makeContext(); backing = jl_create_ts_module("text", ctx); } orc::ThreadSafeModule &clone = llvmmod ? *unwrap(llvmmod) : backing; @@ -367,11 +481,11 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm params.imaging_mode = imaging; params.debug_level = cgparams->debug_info_level; params.external_linkage = _external_linkage; - arraylist_new(&new_invokes, 0); size_t compile_for[] = { jl_typeinf_world, _world }; int worlds = 0; if (jl_options.trim != JL_TRIM_NO) worlds = 1; + jl_compiled_functions_t compiled_functions; for (; worlds < 2; worlds++) { JL_TIMING(NATIVE_AOT, NATIVE_Codegen); size_t this_world = compile_for[worlds]; @@ -391,7 +505,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm continue; } mi = (jl_method_instance_t*)item; -compile_mi: src = NULL; // if this method is generally visible to the current compilation world, // and this is either the primary world, or not applicable in the primary world @@ -406,7 +519,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm jl_(mi); abort(); } - if (codeinst && !params.compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) { + if (codeinst && !compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) { // now add it to our compilation results // Const returns do not do codegen, but juliac inspects codegen results so make a dummy fvar entry to represent it if (jl_options.trim != JL_TRIM_NO && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) { @@ -418,7 +531,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm Triple(clone.getModuleUnlocked()->getTargetTriple())); jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, NULL, params); if (result_m) - params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; + compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; else if (jl_options.trim != JL_TRIM_NO) { // if we're building a small image, we need to compile everything // to ensure that we have all the information we need. @@ -428,26 +541,19 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm } } } - } else if (this_world != jl_typeinf_world) { + } + else if (this_world != jl_typeinf_world) { /* jl_safe_printf("Codegen could not find requested codeinstance to be compiled\n"); jl_(mi); abort(); */ } - // TODO: is goto the best way to do this? - jl_compile_workqueue(params, policy); - mi = (jl_method_instance_t*)arraylist_pop(&new_invokes); - if (mi != NULL) { - goto compile_mi; - } } - - // finally, make sure all referenced methods also get compiled or fixed up - jl_compile_workqueue(params, policy); } JL_GC_POP(); - arraylist_free(&new_invokes); + // finally, make sure all referenced methods also get compiled or fixed up + compile_workqueue(params, policy, compiled_functions); // process the globals array, before jl_merge_module destroys them SmallVector gvars(params.global_targets.size()); @@ -464,7 +570,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm data->jl_value_to_llvm[idx] = global.first; idx++; } - CreateNativeMethods += params.compiled_functions.size(); + CreateNativeMethods += compiled_functions.size(); size_t offset = gvars.size(); data->jl_external_to_llvm.resize(params.external_fns.size()); @@ -489,7 +595,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm { JL_TIMING(NATIVE_AOT, NATIVE_Merge); Linker L(*clone.getModuleUnlocked()); - for (auto &def : params.compiled_functions) { + for (auto &def : compiled_functions) { jl_merge_module(clone, std::move(std::get<0>(def.second))); jl_code_instance_t *this_code = def.first; jl_llvm_functions_t decls = std::get<1>(def.second); @@ -573,9 +679,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm } ct->reentrant_timing &= ~1ull; } - if (ctx.getContext()) { - jl_ExecutionEngine->releaseContext(std::move(ctx)); - } return (void*)data; } @@ -1975,11 +2078,6 @@ void jl_dump_native_impl(void *native_code, } } -void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis) -{ - PM->add(new TargetLibraryInfoWrapperPass(triple)); - PM->add(createTargetTransformInfoWrapperPass(std::move(analysis))); -} // sometimes in GDB you want to find out what code would be created from a mi extern "C" JL_DLLEXPORT_CODEGEN jl_code_info_t *jl_gdbdumpcode(jl_method_instance_t *mi) @@ -2037,8 +2135,8 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, jl_ dump->F = nullptr; dump->TSM = nullptr; if (src && jl_is_code_info(src)) { - auto ctx = jl_ExecutionEngine->getContext(); - orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), *ctx); + auto ctx = jl_ExecutionEngine->makeContext(); + orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), ctx); uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) @@ -2046,7 +2144,7 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, jl_ auto target_info = m.withModuleDo([&](Module &M) { return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple())); }); - jl_codegen_params_t output(*ctx, std::move(target_info.first), std::move(target_info.second)); + jl_codegen_params_t output(ctx, std::move(target_info.first), std::move(target_info.second)); output.params = ¶ms; output.imaging_mode = imaging_default(); // This would be nice, but currently it causes some assembly regressions that make printed output diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp index 8557698a4e513..c257d2a2e3331 100644 --- a/src/cgmemmgr.cpp +++ b/src/cgmemmgr.cpp @@ -32,14 +32,14 @@ namespace { -static size_t get_block_size(size_t size) +static size_t get_block_size(size_t size) JL_NOTSAFEPOINT { return (size > jl_page_size * 256 ? LLT_ALIGN(size, jl_page_size) : jl_page_size * 256); } // Wrapper function to mmap/munmap/mprotect pages... -static void *map_anon_page(size_t size) +static void *map_anon_page(size_t size) JL_NOTSAFEPOINT { #ifdef _OS_WINDOWS_ char *mem = (char*)VirtualAlloc(NULL, size + jl_page_size, @@ -54,7 +54,7 @@ static void *map_anon_page(size_t size) return mem; } -static void unmap_page(void *ptr, size_t size) +static void unmap_page(void *ptr, size_t size) JL_NOTSAFEPOINT { #ifdef _OS_WINDOWS_ VirtualFree(ptr, size, MEM_DECOMMIT); @@ -71,7 +71,7 @@ enum class Prot : int { NO = PAGE_NOACCESS }; -static void protect_page(void *ptr, size_t size, Prot flags) +static void protect_page(void *ptr, size_t size, Prot flags) JL_NOTSAFEPOINT { DWORD old_prot; if (!VirtualProtect(ptr, size, (DWORD)flags, &old_prot)) { @@ -89,7 +89,7 @@ enum class Prot : int { NO = PROT_NONE }; -static void protect_page(void *ptr, size_t size, Prot flags) +static void protect_page(void *ptr, size_t size, Prot flags) JL_NOTSAFEPOINT { int ret = mprotect(ptr, size, (int)flags); if (ret != 0) { @@ -98,7 +98,7 @@ static void protect_page(void *ptr, size_t size, Prot flags) } } -static bool check_fd_or_close(int fd) +static bool check_fd_or_close(int fd) JL_NOTSAFEPOINT { if (fd == -1) return false; @@ -129,7 +129,7 @@ static intptr_t anon_hdl = -1; // Also, creating big file mapping and then map pieces of it seems to // consume too much global resources. Therefore, we use each file mapping // as a block on windows -static void *create_shared_map(size_t size, size_t id) +static void *create_shared_map(size_t size, size_t id) JL_NOTSAFEPOINT { void *addr = MapViewOfFile((HANDLE)id, FILE_MAP_ALL_ACCESS, 0, 0, size); @@ -137,13 +137,13 @@ static void *create_shared_map(size_t size, size_t id) return addr; } -static intptr_t init_shared_map() +static intptr_t init_shared_map() JL_NOTSAFEPOINT { anon_hdl = 0; return 0; } -static void *alloc_shared_page(size_t size, size_t *id, bool exec) +static void *alloc_shared_page(size_t size, size_t *id, bool exec) JL_NOTSAFEPOINT { assert(size % jl_page_size == 0); DWORD file_mode = exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; @@ -162,7 +162,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec) } #else // _OS_WINDOWS_ // For shared mapped region -static intptr_t get_anon_hdl(void) +static intptr_t get_anon_hdl(void) JL_NOTSAFEPOINT { int fd = -1; @@ -228,7 +228,7 @@ static struct _make_shared_map_lock { }; } shared_map_lock; -static size_t get_map_size_inc() +static size_t get_map_size_inc() JL_NOTSAFEPOINT { rlimit rl; if (getrlimit(RLIMIT_FSIZE, &rl) != -1) { @@ -242,7 +242,7 @@ static size_t get_map_size_inc() return map_size_inc_default; } -static void *create_shared_map(size_t size, size_t id) +static void *create_shared_map(size_t size, size_t id) JL_NOTSAFEPOINT { void *addr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, anon_hdl, id); @@ -250,7 +250,7 @@ static void *create_shared_map(size_t size, size_t id) return addr; } -static intptr_t init_shared_map() +static intptr_t init_shared_map() JL_NOTSAFEPOINT { anon_hdl = get_anon_hdl(); if (anon_hdl == -1) @@ -265,7 +265,7 @@ static intptr_t init_shared_map() return anon_hdl; } -static void *alloc_shared_page(size_t size, size_t *id, bool exec) +static void *alloc_shared_page(size_t size, size_t *id, bool exec) JL_NOTSAFEPOINT { assert(size % jl_page_size == 0); size_t off = jl_atomic_fetch_add(&map_offset, size); @@ -292,7 +292,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec) #ifdef _OS_LINUX_ // Using `/proc/self/mem`, A.K.A. Keno's remote memory manager. -ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr) +ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr) JL_NOTSAFEPOINT { static_assert(sizeof(off_t) >= 8, "off_t is smaller than 64bits"); #ifdef _P64 @@ -319,7 +319,7 @@ ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr) // Do not call this directly. // Use `get_self_mem_fd` which has a guard to call this only once. -static int _init_self_mem() +static int _init_self_mem() JL_NOTSAFEPOINT { struct utsname kernel; uname(&kernel); @@ -359,13 +359,13 @@ static int _init_self_mem() return fd; } -static int get_self_mem_fd() +static int get_self_mem_fd() JL_NOTSAFEPOINT { static int fd = _init_self_mem(); return fd; } -static void write_self_mem(void *dest, void *ptr, size_t size) +static void write_self_mem(void *dest, void *ptr, size_t size) JL_NOTSAFEPOINT { while (size > 0) { ssize_t ret = pwrite_addr(get_self_mem_fd(), ptr, size, (uintptr_t)dest); @@ -424,7 +424,7 @@ struct Block { Block(const Block&) = delete; Block &operator=(const Block&) = delete; - Block(Block &&other) + Block(Block &&other) JL_NOTSAFEPOINT : ptr(other.ptr), total(other.total), avail(other.avail) @@ -433,9 +433,9 @@ struct Block { other.total = other.avail = 0; } - Block() = default; + Block() JL_NOTSAFEPOINT = default; - void *alloc(size_t size, size_t align) + void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT { size_t aligned_avail = avail & (-align); if (aligned_avail < size) @@ -444,7 +444,7 @@ struct Block { avail = aligned_avail - size; return p; } - void reset(void *addr, size_t size) + void reset(void *addr, size_t size) JL_NOTSAFEPOINT { if (avail >= jl_page_size) { uintptr_t end = uintptr_t(ptr) + total; @@ -462,7 +462,8 @@ class RWAllocator { static constexpr int nblocks = 8; Block blocks[nblocks]{}; public: - void *alloc(size_t size, size_t align) + RWAllocator() JL_NOTSAFEPOINT = default; + void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT { size_t min_size = (size_t)-1; int min_id = 0; @@ -498,9 +499,9 @@ struct SplitPtrBlock : public Block { uintptr_t wr_ptr{0}; uint32_t state{0}; - SplitPtrBlock() = default; + SplitPtrBlock() JL_NOTSAFEPOINT = default; - void swap(SplitPtrBlock &other) + void swap(SplitPtrBlock &other) JL_NOTSAFEPOINT { std::swap(ptr, other.ptr); std::swap(total, other.total); @@ -509,7 +510,7 @@ struct SplitPtrBlock : public Block { std::swap(state, other.state); } - SplitPtrBlock(SplitPtrBlock &&other) + SplitPtrBlock(SplitPtrBlock &&other) JL_NOTSAFEPOINT : SplitPtrBlock() { swap(other); @@ -534,11 +535,12 @@ class ROAllocator { // but might not have all the permissions set or data copied yet. SmallVector completed; virtual void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, - size_t size, size_t align) = 0; - virtual SplitPtrBlock alloc_block(size_t size) = 0; + size_t size, size_t align) JL_NOTSAFEPOINT = 0; + virtual SplitPtrBlock alloc_block(size_t size) JL_NOTSAFEPOINT = 0; public: - virtual ~ROAllocator() {} - virtual void finalize() + ROAllocator() JL_NOTSAFEPOINT = default; + virtual ~ROAllocator() JL_NOTSAFEPOINT {} + virtual void finalize() JL_NOTSAFEPOINT { for (auto &alloc: allocations) { // ensure the mapped pages are consistent @@ -552,7 +554,7 @@ class ROAllocator { } // Allocations that have not been finalized yet. SmallVector allocations; - void *alloc(size_t size, size_t align) + void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT { size_t min_size = (size_t)-1; int min_id = 0; @@ -603,7 +605,7 @@ class ROAllocator { template class DualMapAllocator : public ROAllocator { protected: - void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override + void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override JL_NOTSAFEPOINT { assert((char*)rt_ptr >= block.ptr && (char*)rt_ptr < (block.ptr + block.total)); @@ -618,7 +620,7 @@ class DualMapAllocator : public ROAllocator { } return (char*)rt_ptr + (block.wr_ptr - uintptr_t(block.ptr)); } - SplitPtrBlock alloc_block(size_t size) override + SplitPtrBlock alloc_block(size_t size) override JL_NOTSAFEPOINT { SplitPtrBlock new_block; // use `wr_ptr` to record the id initially @@ -626,7 +628,7 @@ class DualMapAllocator : public ROAllocator { new_block.reset(ptr, size); return new_block; } - void finalize_block(SplitPtrBlock &block, bool reset) + void finalize_block(SplitPtrBlock &block, bool reset) JL_NOTSAFEPOINT { // This function handles setting the block to the right mode // and free'ing maps that are not needed anymore. @@ -662,11 +664,11 @@ class DualMapAllocator : public ROAllocator { } } public: - DualMapAllocator() + DualMapAllocator() JL_NOTSAFEPOINT { assert(anon_hdl != -1); } - void finalize() override + void finalize() override JL_NOTSAFEPOINT { for (auto &block : this->blocks) { finalize_block(block, false); @@ -685,7 +687,7 @@ class SelfMemAllocator : public ROAllocator { SmallVector temp_buff; protected: void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, - size_t size, size_t align) override + size_t size, size_t align) override JL_NOTSAFEPOINT { assert(!(block.state & SplitPtrBlock::InitAlloc)); for (auto &wr_block: temp_buff) { @@ -699,13 +701,13 @@ class SelfMemAllocator : public ROAllocator { new_block.reset(map_anon_page(block_size), block_size); return new_block.alloc(size, align); } - SplitPtrBlock alloc_block(size_t size) override + SplitPtrBlock alloc_block(size_t size) override JL_NOTSAFEPOINT { SplitPtrBlock new_block; new_block.reset(map_anon_page(size), size); return new_block; } - void finalize_block(SplitPtrBlock &block, bool reset) + void finalize_block(SplitPtrBlock &block, bool reset) JL_NOTSAFEPOINT { if (!(block.state & SplitPtrBlock::Alloc)) return; @@ -718,13 +720,13 @@ class SelfMemAllocator : public ROAllocator { } } public: - SelfMemAllocator() + SelfMemAllocator() JL_NOTSAFEPOINT : ROAllocator(), temp_buff() { assert(get_self_mem_fd() != -1); } - void finalize() override + void finalize() override JL_NOTSAFEPOINT { for (auto &block : this->blocks) { finalize_block(block, false); @@ -770,17 +772,15 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { RWAllocator rw_alloc; std::unique_ptr> ro_alloc; std::unique_ptr> exe_alloc; - bool code_allocated; size_t total_allocated; public: - RTDyldMemoryManagerJL() + RTDyldMemoryManagerJL() JL_NOTSAFEPOINT : SectionMemoryManager(), pending_eh(), rw_alloc(), ro_alloc(), exe_alloc(), - code_allocated(false), total_allocated(0) { #ifdef _OS_LINUX_ @@ -794,12 +794,12 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { exe_alloc.reset(new DualMapAllocator()); } } - ~RTDyldMemoryManagerJL() override + ~RTDyldMemoryManagerJL() override JL_NOTSAFEPOINT { } - size_t getTotalBytes() { return total_allocated; } + size_t getTotalBytes() JL_NOTSAFEPOINT { return total_allocated; } void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) override; + size_t Size) override JL_NOTSAFEPOINT; #if 0 // Disable for now since we are not actually using this. void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, @@ -807,16 +807,16 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { #endif uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, - StringRef SectionName) override; + StringRef SectionName) override JL_NOTSAFEPOINT; uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, StringRef SectionName, - bool isReadOnly) override; + bool isReadOnly) override JL_NOTSAFEPOINT; using SectionMemoryManager::notifyObjectLoaded; void notifyObjectLoaded(RuntimeDyld &Dyld, - const object::ObjectFile &Obj) override; - bool finalizeMemory(std::string *ErrMsg = nullptr) override; + const object::ObjectFile &Obj) override JL_NOTSAFEPOINT; + bool finalizeMemory(std::string *ErrMsg = nullptr) override JL_NOTSAFEPOINT; template - void mapAddresses(DL &Dyld, Alloc &&allocator) + void mapAddresses(DL &Dyld, Alloc &&allocator) JL_NOTSAFEPOINT { for (auto &alloc: allocator->allocations) { if (alloc.rt_addr == alloc.wr_addr || alloc.relocated) @@ -826,7 +826,7 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { } } template - void mapAddresses(DL &Dyld) + void mapAddresses(DL &Dyld) JL_NOTSAFEPOINT { if (!ro_alloc) return; @@ -838,14 +838,9 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, - StringRef SectionName) + StringRef SectionName) JL_NOTSAFEPOINT { // allocating more than one code section can confuse libunwind. -#if !defined(_COMPILER_MSAN_ENABLED_) && !defined(_COMPILER_ASAN_ENABLED_) - // TODO: Figure out why msan and now asan too need this. - assert(!code_allocated); - code_allocated = true; -#endif total_allocated += Size; jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size); jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, Size); @@ -859,7 +854,7 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, StringRef SectionName, - bool isReadOnly) + bool isReadOnly) JL_NOTSAFEPOINT { total_allocated += Size; jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size); @@ -873,7 +868,7 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size, } void RTDyldMemoryManagerJL::notifyObjectLoaded(RuntimeDyld &Dyld, - const object::ObjectFile &Obj) + const object::ObjectFile &Obj) JL_NOTSAFEPOINT { if (!ro_alloc) { assert(!exe_alloc); @@ -884,9 +879,8 @@ void RTDyldMemoryManagerJL::notifyObjectLoaded(RuntimeDyld &Dyld, mapAddresses(Dyld); } -bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg) +bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg) JL_NOTSAFEPOINT { - code_allocated = false; if (ro_alloc) { ro_alloc->finalize(); assert(exe_alloc); @@ -904,7 +898,7 @@ bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg) void RTDyldMemoryManagerJL::registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) + size_t Size) JL_NOTSAFEPOINT { if (uintptr_t(Addr) == LoadAddr) { register_eh_frames(Addr, Size); @@ -917,7 +911,7 @@ void RTDyldMemoryManagerJL::registerEHFrames(uint8_t *Addr, #if 0 void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) + size_t Size) JL_NOTSAFEPOINT { deregister_eh_frames((uint8_t*)LoadAddr, Size); } @@ -925,12 +919,12 @@ void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr, } -RTDyldMemoryManager* createRTDyldMemoryManager() +RTDyldMemoryManager* createRTDyldMemoryManager() JL_NOTSAFEPOINT { return new RTDyldMemoryManagerJL(); } -size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) +size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT { return ((RTDyldMemoryManagerJL*)mm)->getTotalBytes(); } diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp index 31631eb70a4ad..4892ebdabd110 100644 --- a/src/clangsa/GCChecker.cpp +++ b/src/clangsa/GCChecker.cpp @@ -31,7 +31,7 @@ namespace { using namespace clang; using namespace ento; -#define PDP std::shared_ptr +typedef std::shared_ptr PDP; #define MakePDP make_unique static const Stmt *getStmtForDiagnostics(const ExplodedNode *N) @@ -394,13 +394,18 @@ PDP GCChecker::SafepointBugVisitor::VisitNode(const ExplodedNode *N, } else { PathDiagnosticLocation Pos = PathDiagnosticLocation::createDeclBegin( N->getLocationContext(), BRC.getSourceManager()); - return MakePDP(Pos, "Tracking JL_NOT_SAFEPOINT annotation here."); + if (Pos.isValid()) + return MakePDP(Pos, "Tracking JL_NOT_SAFEPOINT annotation here."); + //N->getLocation().dump(); } } else if (NewSafepointDisabled == (unsigned)-1) { PathDiagnosticLocation Pos = PathDiagnosticLocation::createDeclBegin( N->getLocationContext(), BRC.getSourceManager()); - return MakePDP(Pos, "Safepoints re-enabled here"); + if (Pos.isValid()) + return MakePDP(Pos, "Safepoints re-enabled here"); + //N->getLocation().dump(); } + // n.b. there may be no position here to report if they were disabled by julia_notsafepoint_enter/leave } return nullptr; } diff --git a/src/codegen.cpp b/src/codegen.cpp index 3f69f4789493a..0ab26a65fcaaa 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -233,7 +233,6 @@ STATISTIC(EmittedSpecfunCalls, "Number of specialized calls emitted"); STATISTIC(EmittedInvokes, "Number of invokes emitted"); STATISTIC(EmittedCalls, "Number of calls emitted"); STATISTIC(EmittedUndefVarErrors, "Number of undef var errors emitted"); -STATISTIC(EmittedOpaqueClosureFunctions, "Number of opaque closures emitted"); STATISTIC(EmittedToJLInvokes, "Number of tojlinvoke calls emitted"); STATISTIC(EmittedCFuncInvalidates, "Number of C function invalidates emitted"); STATISTIC(GeneratedCFuncWrappers, "Number of C function wrappers generated"); @@ -1009,6 +1008,11 @@ static const auto jlinvoke_func = new JuliaFunction<>{ {AttributeSet(), Attributes(C, {Attribute::ReadOnly, Attribute::NoCapture})}); }, }; +static const auto jlopaque_closure_call_func = new JuliaFunction<>{ + XSTR(jl_f_opaque_closure_call), + get_func_sig, + get_func_attrs, +}; static const auto jlmethod_func = new JuliaFunction<>{ XSTR(jl_method_def), [](LLVMContext &C) { @@ -1606,7 +1610,7 @@ static const auto jltuple_func = new JuliaFunction<>{XSTR(jl_f_tuple), get_func_ static const auto jlintrinsic_func = new JuliaFunction<>{XSTR(jl_f_intrinsic_call), get_func3_sig, get_func_attrs}; static const auto &builtin_func_map() { - static std::map*> builtins = { + static auto builtins = new DenseMap*> { { jl_f_is_addr, new JuliaFunction<>{XSTR(jl_f_is), get_func_sig, get_func_attrs} }, { jl_f_typeof_addr, new JuliaFunction<>{XSTR(jl_f_typeof), get_func_sig, get_func_attrs} }, { jl_f_sizeof_addr, new JuliaFunction<>{XSTR(jl_f_sizeof), get_func_sig, get_func_attrs} }, @@ -1649,18 +1653,18 @@ static const auto &builtin_func_map() { { jl_f__svec_ref_addr, new JuliaFunction<>{XSTR(jl_f__svec_ref), get_func_sig, get_func_attrs} }, { jl_f_current_scope_addr, new JuliaFunction<>{XSTR(jl_f_current_scope), get_func_sig, get_func_attrs} }, }; - return builtins; + return *builtins; } static const auto &may_dispatch_builtins() { - static std::unordered_set builtins( + static auto builtins = new DenseSet( {jl_f__apply_iterate_addr, jl_f__apply_pure_addr, jl_f__call_in_world_addr, jl_f__call_in_world_total_addr, jl_f__call_latest_addr, }); - return builtins; + return *builtins; } static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction<>{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs}; @@ -2976,7 +2980,7 @@ static void jl_name_jlfuncparams_args(jl_codegen_params_t ¶ms, Function *F) F->getArg(3)->setName("sparams::Any"); } -static void jl_init_function(Function *F, const Triple &TT) +void jl_init_function(Function *F, const Triple &TT) { // set any attributes that *must* be set on all functions AttrBuilder attr(F->getContext()); @@ -3023,6 +3027,7 @@ static bool uses_specsig(jl_value_t *sig, bool needsparams, jl_value_t *rettype, if (jl_vararg_kind(jl_tparam(sig, jl_nparams(sig) - 1)) == JL_VARARG_UNBOUND) return false; // not invalid, consider if specialized signature is worthwhile + // n.b. sig is sometimes wrong for OC (tparam0 might be the captures type of the specialization, even though what gets passed in that slot is an OC object), so prefer_specsig is always set (instead of recomputing tparam0 using get_oc_type) if (prefer_specsig) return true; if (!deserves_retbox(rettype) && !jl_is_datatype_singleton((jl_datatype_t*)rettype) && rettype != (jl_value_t*)jl_bool_type) @@ -5236,7 +5241,15 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *t if (theF) theArgs.push_back(theF); for (size_t i = 0; i < nargs; i++) { - Value *arg = boxed(ctx, argv[i]); + Value *arg; + if (i == 0 && trampoline == julia_call3) { + const jl_cgval_t &f = argv[i]; + arg = f.inline_roots.empty() && f.ispointer() ? data_pointer(ctx, f) : value_to_pointer(ctx, f).V; + arg = decay_derived(ctx, arg); + } + else { + arg = boxed(ctx, argv[i]); + } theArgs.push_back(arg); } CallInst *result = ctx.builder.CreateCall(TheTrampoline, theArgs); @@ -5283,13 +5296,13 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos idx++; } for (size_t i = 0; i < nargs; i++) { - jl_value_t *jt = jl_nth_slot_type(specTypes, i); // n.b.: specTypes is required to be a datatype by construction for specsig if (is_opaque_closure && i == 0) { // Special implementation for opaque closures: their jt and thus - // julia_type_to_llvm values are likely wrong, so override the - // behavior here to directly pass the expected pointer based instead - // just on passing arg as a pointer + // julia_type_to_llvm values are likely wrong (based on captures instead of the OC), so override the + // behavior here to directly pass the expected pointer directly instead of + // computing it from the available information + // jl_value_t *oc_type = (jl_value_t*)jl_any_type; // more accurately: get_oc_type(specTypes, jlretty) jl_cgval_t arg = argv[i]; if (arg.isghost) { argvals[idx] = Constant::getNullValue(ctx.builder.getPtrTy(AddressSpace::Derived)); @@ -5302,6 +5315,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos idx++; continue; } + jl_value_t *jt = jl_nth_slot_type(specTypes, i); jl_cgval_t arg = update_julia_type(ctx, argv[i], jt); if (arg.typ == jl_bottom_type) return jl_cgval_t(); @@ -5519,6 +5533,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR // Check if we already queued this up auto it = ctx.call_targets.find(codeinst); if (need_to_emit && it != ctx.call_targets.end()) { + assert(it->second.specsig == specsig); protoname = it->second.decl->getName(); need_to_emit = cache_valid = false; } @@ -5559,7 +5574,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR handled = true; if (need_to_emit) { Function *trampoline_decl = cast(jl_Module->getNamedValue(protoname)); - ctx.call_targets[codeinst] = {cc, return_roots, trampoline_decl, specsig}; + ctx.call_targets[codeinst] = {cc, return_roots, trampoline_decl, nullptr, specsig}; if (trim_may_error(ctx.params->trim)) push_frames(ctx, ctx.linfo, mi); } @@ -5570,9 +5585,9 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR if (!handled) { if (trim_may_error(ctx.params->trim)) { if (lival.constant) { - arraylist_push(&new_invokes, lival.constant); push_frames(ctx, ctx.linfo, (jl_method_instance_t*)lival.constant); - } else { + } + else { errs() << "Dynamic call to unknown function"; errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; @@ -5728,20 +5743,16 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo } } FunctionCallee fptr; - Value *F; JuliaFunction<> *cc; if (f.typ == (jl_value_t*)jl_intrinsic_type) { fptr = prepare_call(jlintrinsic_func); - F = f.inline_roots.empty() && f.ispointer() ? data_pointer(ctx, f) : value_to_pointer(ctx, f).V; - F = decay_derived(ctx, F); cc = julia_call3; } else { fptr = FunctionCallee(get_func_sig(ctx.builder.getContext()), ctx.builder.CreateCall(prepare_call(jlgetbuiltinfptr_func), {emit_typeof(ctx, f)})); - F = boxed(ctx, f); cc = julia_call; } - Value *ret = emit_jlcall(ctx, fptr, F, ArrayRef(argv).drop_front(), nargs - 1, cc); + Value *ret = emit_jlcall(ctx, fptr, nullptr, argv, nargs, cc); setName(ctx.emission_context, ret, "Builtin_ret"); return mark_julia_type(ctx, ret, true, rt); } @@ -5758,52 +5769,12 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo JL_GC_POP(); return r; } + // TODO: else emit_oc_call } } int failed_dispatch = !argv[0].constant; if (ctx.params->trim != JL_TRIM_NO) { - size_t min_valid = 1; - size_t max_valid = ~(size_t)0; - size_t latest_world = jl_get_world_counter(); // TODO: marshal the world age of the compilation here. - - // Find all methods matching the call signature - jl_array_t *matches = NULL; - jl_value_t *tup = NULL; - JL_GC_PUSH2(&tup, &matches); - if (!failed_dispatch) { - SmallVector argtypes; - for (auto& arg: argv) - argtypes.push_back(arg.typ); - tup = jl_apply_tuple_type_v(argtypes.data(), argtypes.size()); - matches = (jl_array_t*)jl_matching_methods((jl_tupletype_t*)tup, jl_nothing, 10 /*TODO: make global*/, 1, - latest_world, &min_valid, &max_valid, NULL); - if ((jl_value_t*)matches == jl_nothing) - failed_dispatch = 1; - } - - // Expand each matching method to its unique specialization, if it has exactly one - if (!failed_dispatch) { - size_t k; - size_t len = new_invokes.len; - for (k = 0; k < jl_array_nrows(matches); k++) { - jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k); - jl_method_instance_t *mi = jl_method_match_to_mi(match, latest_world, min_valid, max_valid, 0); - if (!mi) { - if (jl_array_nrows(matches) == 1) { - // if the method match is not compileable, but there is only one, fall back to - // unspecialized implementation - mi = jl_get_unspecialized(match->method); - } - else { - new_invokes.len = len; - failed_dispatch = 1; - break; - } - } - arraylist_push(&new_invokes, mi); - } - } - JL_GC_POP(); + abort(); // this code path is unsound, unsafe, and probably bad } if (failed_dispatch && trim_may_error(ctx.params->trim)) { @@ -6634,66 +6605,73 @@ static std::pair get_oc_function(jl_codectx_t &ctx, jl_met assert(jl_is_method_instance(mi)); ci = jl_atomic_load_relaxed(&mi->cache); } - - if (ci == NULL || (jl_value_t*)ci == jl_nothing) { - JL_GC_POP(); - return std::make_pair((Function*)NULL, (Function*)NULL); - } - auto inferred = jl_atomic_load_relaxed(&ci->inferred); - if (!inferred || inferred == jl_nothing) { + if (ci == NULL || (jl_value_t*)ci == jl_nothing || ci->rettype != rettype || !jl_egal(sigtype, mi->specTypes)) { // TODO: correctly handle the ABI conversion if rettype != ci->rettype JL_GC_POP(); return std::make_pair((Function*)NULL, (Function*)NULL); } - auto it = ctx.emission_context.compiled_functions.find(ci); - - if (it == ctx.emission_context.compiled_functions.end()) { - ++EmittedOpaqueClosureFunctions; - jl_code_info_t *ir = jl_uncompress_ir(closure_method, ci, (jl_value_t*)inferred); - JL_GC_PUSH1(&ir); - // TODO: Emit this inline and outline it late using LLVM's coroutine support. - orc::ThreadSafeModule closure_m = jl_create_ts_module( - name_from_method_instance(mi), ctx.emission_context.tsctx, - jl_Module->getDataLayout(), Triple(jl_Module->getTargetTriple())); - jl_llvm_functions_t closure_decls = emit_function(closure_m, mi, ir, rettype, ctx.emission_context); - JL_GC_POP(); - it = ctx.emission_context.compiled_functions.insert(std::make_pair(ci, std::make_pair(std::move(closure_m), std::move(closure_decls)))).first; + // method lookup code (similar to emit_invoke, and the inverse of emit_specsig_oc_call) + bool specsig = uses_specsig(sigtype, false, rettype, true); + std::string name; + std::string oc; + StringRef protoname; + StringRef proto_oc; + + // Check if we already queued this up + auto it = ctx.call_targets.find(ci); + bool need_to_emit = it == ctx.call_targets.end(); + if (!need_to_emit) { + assert(specsig == it->second.specsig); + if (specsig) { + protoname = it->second.decl->getName(); + proto_oc = it->second.oc->getName(); + } + else { + proto_oc = it->second.decl->getName(); + } + need_to_emit = false; + } + else { + if (specsig) { + raw_string_ostream(name) << "j_" << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); + protoname = StringRef(name); + } + raw_string_ostream(oc) << "j1_" << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); + proto_oc = StringRef(oc); } - auto &closure_m = it->second.first; - auto &closure_decls = it->second.second; - - assert(closure_decls.functionObject != "jl_fptr_sparam"); - bool isspecsig = closure_decls.functionObject != "jl_fptr_args"; - - Function *F = NULL; - std::string fname = isspecsig ? - closure_decls.functionObject : - closure_decls.specFunctionObject; - if (GlobalValue *V = jl_Module->getNamedValue(fname)) { + // Get the fptr1 OC + Function *F = nullptr; + if (GlobalValue *V = jl_Module->getNamedValue(proto_oc)) { F = cast(V); } else { F = Function::Create(get_func_sig(ctx.builder.getContext()), Function::ExternalLinkage, - fname, jl_Module); + proto_oc, jl_Module); jl_init_function(F, ctx.emission_context.TargetTriple); jl_name_jlfunc_args(ctx.emission_context, F); F->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), F->getAttributes()})); } - Function *specF = NULL; - if (!isspecsig) { - specF = F; + + // Get the specsig (if applicable) + Function *specF = nullptr; + jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed; + unsigned return_roots = 0; + bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; + assert(is_opaque_closure); + if (specsig) { + bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); + jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, nullptr, protoname, mi->specTypes, rettype, is_opaque_closure, gcstack_arg); + cc = returninfo.cc; + return_roots = returninfo.return_roots; + specF = cast(returninfo.decl.getCallee()); } - else { - //emission context holds context lock so can get module - specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject); - if (specF) { - jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, NULL, - closure_decls.specFunctionObject, sigtype, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg)); - specF = cast(returninfo.decl.getCallee()); - } + + if (need_to_emit) { + ctx.call_targets[ci] = {cc, return_roots, specsig ? specF : F, specsig ? F : nullptr, specsig}; } + JL_GC_POP(); return std::make_pair(F, specF); } @@ -7173,7 +7151,12 @@ static Value *get_scope_field(jl_codectx_t &ctx) return emit_ptrgep(ctx, ct, offsetof(jl_task_t, scope), "current_scope"); } -static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t ¶ms) +Function *get_or_emit_fptr1(StringRef preal_decl, Module *M) +{ + return cast(M->getOrInsertFunction(preal_decl, get_func_sig(M->getContext()), get_func_attrs(M->getContext())).getCallee()); +} + +Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t ¶ms) JL_NOTSAFEPOINT { ++EmittedToJLInvokes; jl_codectx_t ctx(M->getContext(), params, codeinst); @@ -7184,7 +7167,6 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptr name, M); jl_init_function(f, params.TargetTriple); if (trim_may_error(params.params->trim)) { - arraylist_push(&new_invokes, codeinst->def); // Try t compile this invoke // TODO: Debuginfo! push_frames(ctx, ctx.linfo, codeinst->def, 1); } @@ -7213,7 +7195,17 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptr return f; } -static void emit_cfunc_invalidate( +static jl_value_t *get_oc_type(jl_value_t *calltype, jl_value_t *rettype) JL_ALWAYS_LEAFTYPE +{ + jl_value_t *argtype = jl_argtype_without_function((jl_value_t*)calltype); + JL_GC_PUSH1(&argtype); + jl_value_t *oc_type JL_ALWAYS_LEAFTYPE = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, argtype, rettype); + JL_GC_PROMISE_ROOTED(oc_type); + JL_GC_POP(); + return oc_type; +} + +void emit_specsig_to_fptr1( Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots, jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure, size_t nargs, @@ -7240,14 +7232,18 @@ static void emit_cfunc_invalidate( ++AI; // gcstack_arg } for (size_t i = 0; i < nargs; i++) { - // n.b. calltype is required to be a datatype by construction for specsig - jl_value_t *jt = jl_nth_slot_type(calltype, i); if (i == 0 && is_for_opaque_closure) { + // `jt` would be wrong here (it is the captures type), so is not used used for + // the ABI decisions, but the argument actually will require boxing as its real type + // which can be exactly recomputed from the specialization, as that defined the ABI + jl_value_t *oc_type = get_oc_type(calltype, rettype); Value *arg_v = &*AI; ++AI; - myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const); + myargs[i] = mark_julia_slot(arg_v, (jl_value_t*)oc_type, NULL, ctx.tbaa().tbaa_const); continue; } + // n.b. calltype is required to be a datatype by construction for specsig + jl_value_t *jt = jl_nth_slot_type(calltype, i); bool isboxed = false; Type *et; if (deserves_argbox(jt)) { @@ -7335,16 +7331,6 @@ static void emit_cfunc_invalidate( } } -static void emit_cfunc_invalidate( - Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots, - jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure, - size_t nargs, jl_codegen_params_t ¶ms, - size_t min_world, size_t max_world) -{ - emit_cfunc_invalidate(gf_thunk, cc, return_roots, calltype, rettype, is_for_opaque_closure, nargs, params, - prepare_call_in(gf_thunk->getParent(), jlapplygeneric_func), min_world, max_world); -} - static Function* gen_cfun_wrapper( Module *into, jl_codegen_params_t ¶ms, const function_sig_t &sig, jl_value_t *ff, const char *aliasname, @@ -7712,11 +7698,11 @@ static Function* gen_cfun_wrapper( GlobalVariable::InternalLinkage, funcName, M); jl_init_function(gf_thunk, ctx.emission_context.TargetTriple); gf_thunk->setAttributes(AttributeList::get(M->getContext(), {returninfo.attrs, gf_thunk->getAttributes()})); - // build a specsig -> jl_apply_generic converter thunk + // build a specsig -> jl_apply_generic converter thunk // this builds a method that calls jl_apply_generic (as a closure over a singleton function pointer), // but which has the signature of a specsig - emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context, - min_world, max_world); + emit_specsig_to_fptr1(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context, + prepare_call_in(gf_thunk->getParent(), jlapplygeneric_func), min_world, max_world); returninfo.decl = FunctionCallee(returninfo.decl.getFunctionType(), ctx.builder.CreateSelect(age_ok, returninfo.decl.getCallee(), gf_thunk)); } retval = emit_call_specfun_other(ctx, is_opaque_closure, lam->specTypes, codeinst->rettype, returninfo, nullptr, inputargs, nargs + 1); @@ -8026,7 +8012,8 @@ const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysi } // generate a julia-callable function that calls f (AKA lam) -static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, jl_returninfo_t &f, unsigned nargs, int retarg, StringRef funcName, +// if is_opaque_closure, then generate the OC invoke, rather than a real invoke +static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, jl_returninfo_t &f, unsigned nargs, int retarg, bool is_opaque_closure, StringRef funcName, Module *M, jl_codegen_params_t ¶ms) { ++GeneratedInvokeWrappers; @@ -8055,11 +8042,14 @@ static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, j allocate_gc_frame(ctx, b0); SmallVector argv(nargs); - bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure; jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); for (size_t i = 0; i < nargs; ++i) { - jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type : - jl_nth_slot_type(lam->specTypes, i); + if (i == 0 && is_opaque_closure) { + jl_value_t *oc_type = (jl_value_t*)jl_any_type; // more accurately: get_oc_type(lam->specTypes, jlretty) + argv[i] = mark_julia_slot(funcArg, oc_type, NULL, ctx.tbaa().tbaa_const); + continue; + } + jl_value_t *ty = jl_nth_slot_type(lam->specTypes, i); Value *theArg; if (i == 0) { theArg = funcArg; @@ -8455,6 +8445,7 @@ static jl_llvm_functions_t // specTypes is required to be a datatype by construction for specsig, but maybe not otherwise // OpaqueClosure implicitly loads the env if (i == 0 && ctx.is_opaque_closure) { + // n.b. this is not really needed, because ty was already supposed to be correct if (jl_is_array(src->slottypes)) { ty = jl_array_ptr_ref((jl_array_t*)src->slottypes, i); } @@ -8554,7 +8545,7 @@ static jl_llvm_functions_t raw_string_ostream(wrapName) << "jfptr_" << ctx.name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); declarations.functionObject = wrapName; size_t nparams = jl_nparams(lam->specTypes); - gen_invoke_wrapper(lam, jlrettype, returninfo, nparams, retarg, declarations.functionObject, M, ctx.emission_context); + gen_invoke_wrapper(lam, jlrettype, returninfo, nparams, retarg, ctx.is_opaque_closure, declarations.functionObject, M, ctx.emission_context); // TODO: add attributes: maybe_mark_argument_dereferenceable(Arg, argType) // TODO: add attributes: dereferenceable // TODO: (if needsparams) add attributes: dereferenceable, readonly, nocapture @@ -8564,11 +8555,10 @@ static jl_llvm_functions_t GlobalVariable::ExternalLinkage, declarations.specFunctionObject, M); jl_init_function(f, ctx.emission_context.TargetTriple); - if (needsparams) { + if (needsparams) jl_name_jlfuncparams_args(ctx.emission_context, f); - } else { + else jl_name_jlfunc_args(ctx.emission_context, f); - } f->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), f->getAttributes()})); returninfo.decl = f; declarations.functionObject = needsparams ? "jl_fptr_sparam" : "jl_fptr_args"; @@ -8940,76 +8930,73 @@ static jl_llvm_functions_t } for (i = 0; i < nreq && i < vinfoslen; i++) { jl_sym_t *s = slot_symbol(ctx, i); - jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i); - // TODO: jl_nth_slot_type should call jl_rewrap_unionall? - // specTypes is required to be a datatype by construction for specsig, but maybe not otherwise - bool isboxed = deserves_argbox(argType); - Type *llvmArgType = NULL; - if (i == 0 && ctx.is_opaque_closure) { - isboxed = false; - llvmArgType = ctx.builder.getPtrTy(AddressSpace::Derived); - argType = (jl_value_t*)jl_any_type; - } - else { - llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType); - } jl_varinfo_t &vi = ctx.slots[i]; - if (s == jl_unused_sym || vi.value.constant) { - assert(vi.boxroot == NULL); - if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType)) { - ++AI; - auto tracked = CountTrackedPointers(llvmArgType); - if (tracked.count && !tracked.all) - ++AI; - } - continue; - } jl_cgval_t theArg; - // If this is an opaque closure, implicitly load the env and switch - // the world age. if (i == 0 && ctx.is_opaque_closure) { + // If this is an opaque closure, implicitly load the env and switch + // the world age. The specTypes value is wrong for this field, so + // this needs to be handled first. + // jl_value_t *oc_type = get_oc_type(calltype, rettype); + Value *oc_this = decay_derived(ctx, &*AI); + ++AI; // both specsig (derived) and fptr1 (box) pass this argument as a distinct argument // Load closure world - Value *oc_this = decay_derived(ctx, &*AI++); - Value *argaddr = oc_this; - Value *worldaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, world)); - + Value *worldaddr = emit_ptrgep(ctx, oc_this, offsetof(jl_opaque_closure_t, world)); jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type, nullptr, nullptr, false, AtomicOrdering::NotAtomic, false, ctx.types().alignof_ptr.value()); ctx.world_age_at_entry = closure_world.V; // The tls world in a OC is the world of the closure emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, ctx.types().alignof_ptr); - // Load closure env - Value *envaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, captures)); + if (s == jl_unused_sym || vi.value.constant) + continue; - jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type, - nullptr, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*)); - theArg = update_julia_type(ctx, closure_env, vi.value.typ); - } - else if (specsig) { - theArg = get_specsig_arg(argType, llvmArgType, isboxed); + // Load closure env, which is always a boxed value (usually some Tuple) currently + Value *envaddr = emit_ptrgep(ctx, oc_this, offsetof(jl_opaque_closure_t, captures)); + theArg = typed_load(ctx, envaddr, NULL, (jl_value_t*)vi.value.typ, + nullptr, nullptr, /*isboxed*/true, AtomicOrdering::NotAtomic, false, sizeof(void*)); } else { - if (i == 0) { - // first (function) arg is separate in jlcall - theArg = mark_julia_type(ctx, fArg, true, vi.value.typ); + jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i); + // TODO: jl_nth_slot_type should call jl_rewrap_unionall? + // specTypes is required to be a datatype by construction for specsig, but maybe not otherwise + bool isboxed = deserves_argbox(argType); + Type *llvmArgType = NULL; + llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType); + if (s == jl_unused_sym || vi.value.constant) { + assert(vi.boxroot == NULL); + if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType)) { + ++AI; + auto tracked = CountTrackedPointers(llvmArgType); + if (tracked.count && !tracked.all) + ++AI; + } + continue; + } + if (specsig) { + theArg = get_specsig_arg(argType, llvmArgType, isboxed); } else { - Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr); - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - Value *load = ai.decorateInst(maybe_mark_load_dereferenceable( - ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))), - false, vi.value.typ)); - theArg = mark_julia_type(ctx, load, true, vi.value.typ); - if (debug_enabled && vi.dinfo && !vi.boxroot) { - SmallVector addr; - addr.push_back(llvm::dwarf::DW_OP_deref); - addr.push_back(llvm::dwarf::DW_OP_plus_uconst); - addr.push_back((i - 1) * sizeof(void*)); - if ((Metadata*)vi.dinfo->getType() != debugcache.jl_pvalue_dillvmt) + if (i == 0) { + // first (function) arg is separate in jlcall + theArg = mark_julia_type(ctx, fArg, true, vi.value.typ); + } + else { + Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr); + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); + Value *load = ai.decorateInst(maybe_mark_load_dereferenceable( + ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))), + false, vi.value.typ)); + theArg = mark_julia_type(ctx, load, true, vi.value.typ); + if (debug_enabled && vi.dinfo && !vi.boxroot) { + SmallVector addr; addr.push_back(llvm::dwarf::DW_OP_deref); - dbuilder.insertDeclare(pargArray, vi.dinfo, dbuilder.createExpression(addr), - topdebugloc, - ctx.builder.GetInsertBlock()); + addr.push_back(llvm::dwarf::DW_OP_plus_uconst); + addr.push_back((i - 1) * sizeof(void*)); + if ((Metadata*)vi.dinfo->getType() != debugcache.jl_pvalue_dillvmt) + addr.push_back(llvm::dwarf::DW_OP_deref); + dbuilder.insertDeclare(pargArray, vi.dinfo, dbuilder.createExpression(addr), + topdebugloc, + ctx.builder.GetInsertBlock()); + } } } } @@ -9996,7 +9983,6 @@ jl_llvm_functions_t jl_emit_code( { JL_TIMING(CODEGEN, CODEGEN_LLVM); jl_timing_show_func_sig((jl_value_t *)li->specTypes, JL_TIMING_DEFAULT_BLOCK); - // caller must hold codegen_lock jl_llvm_functions_t decls = {}; assert((params.params == &jl_default_cgparams /* fast path */ || !params.cache || compare_cgparams(params.params, &jl_default_cgparams)) && @@ -10031,33 +10017,38 @@ jl_llvm_functions_t jl_emit_code( return decls; } +static int effects_foldable(uint32_t effects) +{ + // N.B.: This needs to be kept in sync with Core.Compiler.is_foldable(effects, true) + return ((effects & 0x7) == 0) && // is_consistent(effects) + (((effects >> 10) & 0x03) == 0) && // is_noub(effects) + (((effects >> 3) & 0x03) == 0) && // is_effect_free(effects) + ((effects >> 6) & 0x01); // is_terminates(effects) +} + static jl_llvm_functions_t jl_emit_oc_wrapper(orc::ThreadSafeModule &m, jl_codegen_params_t ¶ms, jl_method_instance_t *mi, jl_value_t *rettype) { - Module *M = m.getModuleUnlocked(); - jl_codectx_t ctx(M->getContext(), params, 0, 0); - ctx.name = M->getModuleIdentifier().data(); - std::string funcName = get_function_name(true, false, ctx.name, ctx.emission_context.TargetTriple); jl_llvm_functions_t declarations; declarations.functionObject = "jl_f_opaque_closure_call"; if (uses_specsig(mi->specTypes, false, rettype, true)) { + // context lock is held by params + Module *M = m.getModuleUnlocked(); + jl_codectx_t ctx(M->getContext(), params, 0, 0); + ctx.name = M->getModuleIdentifier().data(); + std::string funcName = get_function_name(true, false, ctx.name, ctx.emission_context.TargetTriple); jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg)); Function *gf_thunk = cast(returninfo.decl.getCallee()); jl_init_function(gf_thunk, ctx.emission_context.TargetTriple); size_t nrealargs = jl_nparams(mi->specTypes); - emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, mi->specTypes, rettype, true, nrealargs, ctx.emission_context, ctx.min_world, ctx.max_world); + emit_specsig_to_fptr1(gf_thunk, returninfo.cc, returninfo.return_roots, + mi->specTypes, rettype, true, nrealargs, ctx.emission_context, + prepare_call_in(gf_thunk->getParent(), jlopaque_closure_call_func), // TODO: this could call emit_oc_call directly + ctx.min_world, ctx.max_world); declarations.specFunctionObject = funcName; } return declarations; } -static int effects_foldable(uint32_t effects) -{ - // N.B.: This needs to be kept in sync with Core.Compiler.is_foldable(effects, true) - return ((effects & 0x7) == 0) && // is_consistent(effects) - (((effects >> 10) & 0x03) == 0) && // is_noub(effects) - (((effects >> 3) & 0x03) == 0) && // is_effect_free(effects) - ((effects >> 6) & 0x01); // is_terminates(effects) -} jl_llvm_functions_t jl_emit_codeinst( orc::ThreadSafeModule &m, @@ -10070,12 +10061,14 @@ jl_llvm_functions_t jl_emit_codeinst( JL_GC_PUSH1(&src); if (!src) { src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred); - jl_method_t *def = codeinst->def->def.method; + jl_method_instance_t *mi = codeinst->def; + jl_method_t *def = mi->def.method; // Check if this is the generic method for opaque closure wrappers - - // if so, generate the specsig -> invoke converter. + // if so, this must compile specptr such that it holds the specptr -> invoke wrapper + // to satisfy the dispatching implementation requirements of jl_f_opaque_closure_call if (def == jl_opaque_closure_method) { JL_GC_POP(); - return jl_emit_oc_wrapper(m, params, codeinst->def, codeinst->rettype); + return jl_emit_oc_wrapper(m, params, mi, codeinst->rettype); } if (src && (jl_value_t*)src != jl_nothing && jl_is_method(def)) src = jl_uncompress_ir(def, codeinst, (jl_value_t*)src); @@ -10149,135 +10142,15 @@ jl_llvm_functions_t jl_emit_codeinst( return decls; } - -void jl_compile_workqueue( - jl_codegen_params_t ¶ms, - CompilationPolicy policy) -{ - JL_TIMING(CODEGEN, CODEGEN_Workqueue); - jl_code_info_t *src = NULL; - JL_GC_PUSH1(&src); - while (!params.workqueue.empty()) { - jl_code_instance_t *codeinst; - auto it = params.workqueue.back(); - codeinst = it.first; - auto proto = it.second; - params.workqueue.pop_back(); - // try to emit code for this item from the workqueue - StringRef preal_decl = ""; - bool preal_specsig = false; - jl_callptr_t invoke = NULL; - if (params.cache) { - // WARNING: this correctness is protected by an outer lock - uint8_t specsigflags; - void *fptr; - jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0); - //if (specsig ? specsigflags & 0b1 : invoke == jl_fptr_args_addr) - if (invoke == jl_fptr_args_addr) { - preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); - } - else if (specsigflags & 0b1) { - preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); - preal_specsig = true; - } - } - if (preal_decl.empty()) { - auto it = params.compiled_functions.find(codeinst); - if (it == params.compiled_functions.end()) { - // Reinfer the function. The JIT came along and removed the inferred - // method body. See #34993 - if ((policy != CompilationPolicy::Default || params.params->trim) && - jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) { - // XXX: SOURCE_MODE_FORCE_SOURCE is wrong here (neither sufficient nor necessary) - codeinst = jl_type_infer(codeinst->def, jl_atomic_load_relaxed(&codeinst->max_world), SOURCE_MODE_FORCE_SOURCE); - } - if (codeinst) { - orc::ThreadSafeModule result_m = - jl_create_ts_module(name_from_method_instance(codeinst->def), - params.tsctx, params.DL, params.TargetTriple); - auto decls = jl_emit_codeinst(result_m, codeinst, NULL, params); - if (result_m) - it = params.compiled_functions.insert(std::make_pair(codeinst, std::make_pair(std::move(result_m), std::move(decls)))).first; - } - } - if (it != params.compiled_functions.end()) { - auto &decls = it->second.second; - if (decls.functionObject == "jl_fptr_args") { - preal_decl = decls.specFunctionObject; - } - else if (decls.functionObject != "jl_fptr_sparam") { - preal_decl = decls.specFunctionObject; - preal_specsig = true; - } - } - } - // patch up the prototype we emitted earlier - Module *mod = proto.decl->getParent(); - assert(proto.decl->isDeclaration()); - if (proto.specsig) { - // expected specsig - if (!preal_specsig) { - if (params.params->trim) { - auto it = params.compiled_functions.find(codeinst); //TODO: What to do about this - errs() << "Bailed out to invoke when compiling:"; - jl_(codeinst->def); - if (it != params.compiled_functions.end()) { - errs() << it->second.second.functionObject << "\n"; - errs() << it->second.second.specFunctionObject << "\n"; - } else - errs() << "codeinst not in compile_functions\n"; - } - // emit specsig-to-(jl)invoke conversion - StringRef invokeName; - if (invoke != NULL) - invokeName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst); - Function *preal = emit_tojlinvoke(codeinst, invokeName, mod, params); - proto.decl->setLinkage(GlobalVariable::InternalLinkage); - //protodecl->setAlwaysInline(); - jl_init_function(proto.decl, params.TargetTriple); - size_t nrealargs = jl_nparams(codeinst->def->specTypes); // number of actual arguments being passed - // TODO: maybe this can be cached in codeinst->specfptr? - emit_cfunc_invalidate(proto.decl, proto.cc, proto.return_roots, codeinst->def->specTypes, codeinst->rettype, false, nrealargs, params, preal, 0, 0); - preal_decl = ""; // no need to fixup the name - } - else { - assert(!preal_decl.empty()); - } - } - else { - // expected non-specsig - if (preal_decl.empty() || preal_specsig) { - // emit jlcall1-to-(jl)invoke conversion - StringRef invokeName; - if (invoke != NULL) - invokeName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst); - preal_decl = emit_tojlinvoke(codeinst, invokeName, mod, params)->getName(); - } - } - if (!preal_decl.empty()) { - // merge and/or rename this prototype to the real function - if (Value *specfun = mod->getNamedValue(preal_decl)) { - if (proto.decl != specfun) - proto.decl->replaceAllUsesWith(specfun); - } - else { - proto.decl->setName(preal_decl); - } - } - } - JL_GC_POP(); -} - - // --- initialization --- -SmallVector, 0> gv_for_global; +static auto gv_for_global = new SmallVector, 0>(); static void global_jlvalue_to_llvm(JuliaVariable *var, jl_value_t **addr) { - gv_for_global.push_back(std::make_pair(addr, var)); + gv_for_global->push_back(std::make_pair(addr, var)); } static JuliaVariable *julia_const_gv(jl_value_t *val) { - for (auto &kv : gv_for_global) { + for (auto &kv : *gv_for_global) { if (*kv.first == val) return kv.second; } @@ -10286,6 +10159,9 @@ static JuliaVariable *julia_const_gv(jl_value_t *val) static void init_jit_functions(void) { + add_named_global("jl_fptr_args", jl_fptr_args_addr); + add_named_global("jl_fptr_sparam", jl_fptr_sparam_addr); + add_named_global("jl_f_opaque_closure_call", &jl_f_opaque_closure_call); add_named_global(jl_small_typeof_var, &jl_small_typeof); add_named_global(jlstack_chk_guard_var, &__stack_chk_guard); add_named_global(jlRTLD_DEFAULT_var, &jl_RTLD_DEFAULT_handle); diff --git a/src/debug-registry.h b/src/debug-registry.h index 4c9e13d8cd72d..4d0b7a44f19e5 100644 --- a/src/debug-registry.h +++ b/src/debug-registry.h @@ -32,7 +32,7 @@ class JITDebugInfoRegistry std::unique_lock lock; CResourceT &resource; - Lock(std::mutex &mutex, CResourceT &resource) JL_NOTSAFEPOINT : lock(mutex), resource(resource) {} + Lock(std::mutex &mutex, CResourceT &resource) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER : lock(mutex), resource(resource) {} Lock(Lock &&) JL_NOTSAFEPOINT = default; Lock &operator=(Lock &&) JL_NOTSAFEPOINT = default; @@ -56,7 +56,7 @@ class JITDebugInfoRegistry return resource; } - ~Lock() JL_NOTSAFEPOINT = default; + ~Lock() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default; }; private: @@ -68,15 +68,15 @@ class JITDebugInfoRegistry Locked(ResourceT resource = ResourceT()) JL_NOTSAFEPOINT : mutex(), resource(std::move(resource)) {} - LockT operator*() JL_NOTSAFEPOINT { + LockT operator*() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return LockT(mutex, resource); } - ConstLockT operator*() const JL_NOTSAFEPOINT { + ConstLockT operator*() const JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return ConstLockT(mutex, resource); } - ~Locked() JL_NOTSAFEPOINT = default; + ~Locked() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default; }; struct image_info_t { @@ -105,6 +105,7 @@ class JITDebugInfoRegistry std::unique_ptr object; std::unique_ptr context; LazyObjectInfo() = delete; + ~LazyObjectInfo() JL_NOTSAFEPOINT = default; }; struct SectionInfo { @@ -113,6 +114,7 @@ class JITDebugInfoRegistry ptrdiff_t slide; uint64_t SectionIndex; SectionInfo() = delete; + ~SectionInfo() JL_NOTSAFEPOINT = default; }; template @@ -145,7 +147,7 @@ class JITDebugInfoRegistry void add_code_in_flight(llvm::StringRef name, jl_code_instance_t *codeinst, const llvm::DataLayout &DL) JL_NOTSAFEPOINT; jl_method_instance_t *lookupLinfo(size_t pointer) JL_NOTSAFEPOINT; void registerJITObject(const llvm::object::ObjectFile &Object, - std::function getLoadAddress); + std::function getLoadAddress) JL_NOTSAFEPOINT; objectmap_t& getObjectMap() JL_NOTSAFEPOINT; void add_image_info(image_info_t info) JL_NOTSAFEPOINT; bool get_image_info(uint64_t base, image_info_t *info) const JL_NOTSAFEPOINT; diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp index f6fca47e9a889..31f1ba8281a89 100644 --- a/src/debuginfo.cpp +++ b/src/debuginfo.cpp @@ -58,7 +58,7 @@ extern "C" void __register_frame(void*) JL_NOTSAFEPOINT; extern "C" void __deregister_frame(void*) JL_NOTSAFEPOINT; template -static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f) +static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f) JL_NOTSAFEPOINT { const char *P = EHFrameAddr; const char *End = P + EHFrameSize; @@ -164,6 +164,12 @@ static void jl_profile_atomic(T f) JL_NOTSAFEPOINT // --- storing and accessing source location metadata --- void jl_add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL) { + // Non-opaque-closure MethodInstances are considered globally rooted + // through their methods, but for OC, we need to create a global root + // here. + jl_method_instance_t *mi = codeinst->def; + if (jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure) + jl_as_global_root((jl_value_t*)mi, 1); getJITDebugRegistry().add_code_in_flight(name, codeinst, DL); } @@ -369,11 +375,6 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object, if (codeinst) { JL_GC_PROMISE_ROOTED(codeinst); mi = codeinst->def; - // Non-opaque-closure MethodInstances are considered globally rooted - // through their methods, but for OC, we need to create a global root - // here. - if (jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure) - mi = (jl_method_instance_t*)jl_as_global_root((jl_value_t*)mi, 1); } jl_profile_atomic([&]() JL_NOTSAFEPOINT { if (mi) @@ -1281,14 +1282,14 @@ void register_eh_frames(uint8_t *Addr, size_t Size) { // On OS X OS X __register_frame takes a single FDE as an argument. // See http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-April/061768.html - processFDEs((char*)Addr, Size, [](const char *Entry) { + processFDEs((char*)Addr, Size, [](const char *Entry) JL_NOTSAFEPOINT { getJITDebugRegistry().libc_frames.libc_register_frame(Entry); }); } void deregister_eh_frames(uint8_t *Addr, size_t Size) { - processFDEs((char*)Addr, Size, [](const char *Entry) { + processFDEs((char*)Addr, Size, [](const char *Entry) JL_NOTSAFEPOINT { getJITDebugRegistry().libc_frames.libc_deregister_frame(Entry); }); } @@ -1300,7 +1301,7 @@ void deregister_eh_frames(uint8_t *Addr, size_t Size) // Skip over an arbitrary long LEB128 encoding. // Return the pointer to the first unprocessed byte. -static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End) +static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End) JL_NOTSAFEPOINT { const uint8_t *P = Addr; while ((*P >> 7) != 0 && P < End) @@ -1312,7 +1313,7 @@ static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End) // bytes than what there are more bytes than what the type can store. // Adjust the pointer to the first unprocessed byte. template static T parse_leb128(const uint8_t *&Addr, - const uint8_t *End) + const uint8_t *End) JL_NOTSAFEPOINT { typedef typename std::make_unsigned::type uT; uT v = 0; @@ -1335,7 +1336,7 @@ template static T parse_leb128(const uint8_t *&Addr, } template -static U safe_trunc(T t) +static U safe_trunc(T t) JL_NOTSAFEPOINT { assert((t >= static_cast(std::numeric_limits::min())) && (t <= static_cast(std::numeric_limits::max()))); @@ -1375,7 +1376,7 @@ enum DW_EH_PE : uint8_t { }; // Parse the CIE and return the type of encoding used by FDE -static DW_EH_PE parseCIE(const uint8_t *Addr, const uint8_t *End) +static DW_EH_PE parseCIE(const uint8_t *Addr, const uint8_t *End) JL_NOTSAFEPOINT { // https://www.airs.com/blog/archives/460 // Length (4 bytes) @@ -1481,7 +1482,7 @@ void register_eh_frames(uint8_t *Addr, size_t Size) // Now first count the number of FDEs size_t nentries = 0; - processFDEs((char*)Addr, Size, [&](const char*){ nentries++; }); + processFDEs((char*)Addr, Size, [&](const char*) JL_NOTSAFEPOINT { nentries++; }); if (nentries == 0) return; @@ -1510,7 +1511,7 @@ void register_eh_frames(uint8_t *Addr, size_t Size) // CIE's (may not happen) without parsing it every time. const uint8_t *cur_cie = nullptr; DW_EH_PE encoding = DW_EH_PE_omit; - processFDEs((char*)Addr, Size, [&](const char *Entry) { + processFDEs((char*)Addr, Size, [&](const char *Entry) JL_NOTSAFEPOINT { // Skip Length (4bytes) and CIE offset (4bytes) uint32_t fde_size = *(const uint32_t*)Entry; uint32_t cie_id = ((const uint32_t*)Entry)[1]; @@ -1631,7 +1632,7 @@ void deregister_eh_frames(uint8_t *Addr, size_t Size) #endif extern "C" JL_DLLEXPORT_CODEGEN -uint64_t jl_getUnwindInfo_impl(uint64_t dwAddr) +uint64_t jl_getUnwindInfo_impl(uint64_t dwAddr) JL_NOTSAFEPOINT { // Might be called from unmanaged thread jl_lock_profile(); diff --git a/src/engine.cpp b/src/engine.cpp index 6db4dce44e48e..2b68de731c4dd 100644 --- a/src/engine.cpp +++ b/src/engine.cpp @@ -45,8 +45,8 @@ template<> struct llvm::DenseMapInfo { } }; -static std::mutex engine_lock; -static std::condition_variable engine_wait; +static std::mutex engine_lock; // n.b. this lock is only ever held briefly +static std::condition_variable engine_wait; // but it may be waiting a while in this state // map from MethodInstance to threadid that owns it currently for inference static DenseMap Reservations; // vector of which threads are blocked and which lease they need @@ -63,55 +63,51 @@ jl_code_instance_t *jl_engine_reserve(jl_method_instance_t *m, jl_value_t *owner ct->ptls->engine_nqueued++; // disables finalizers until inference is finished on this method graph jl_code_instance_t *ci = jl_new_codeinst_uninit(m, owner); // allocate a placeholder JL_GC_PUSH1(&ci); - int8_t gc_state = jl_gc_safe_enter(ct->ptls); - InferKey key = {m, owner}; - std::unique_lock lock(engine_lock); auto tid = jl_atomic_load_relaxed(&ct->tid); - if ((signed)Awaiting.size() < tid + 1) - Awaiting.resize(tid + 1); - while (1) { - auto record = Reservations.find(key); - if (record == Reservations.end()) { - Reservations[key] = ReservationInfo{tid, ci}; - lock.unlock(); - jl_gc_safe_leave(ct->ptls, gc_state); // contains jl_gc_safepoint - JL_GC_POP(); - return ci; - } - // before waiting, need to run deadlock/cycle detection - // there is a cycle if the thread holding our lease is blocked - // and waiting for (transitively) any lease that is held by this thread - auto wait_tid = record->second.tid; - while (1) { - if (wait_tid == tid) { - lock.unlock(); - jl_gc_safe_leave(ct->ptls, gc_state); // contains jl_gc_safepoint - JL_GC_POP(); - ct->ptls->engine_nqueued--; - return ci; // break the cycle + if (([tid, m, owner, ci] () -> bool { // necessary scope block / lambda for unique_lock + jl_unique_gcsafe_lock lock(engine_lock); + InferKey key{m, owner}; + if ((signed)Awaiting.size() < tid + 1) + Awaiting.resize(tid + 1); + while (1) { + auto record = Reservations.find(key); + if (record == Reservations.end()) { + Reservations[key] = ReservationInfo{tid, ci}; + return false; + } + // before waiting, need to run deadlock/cycle detection + // there is a cycle if the thread holding our lease is blocked + // and waiting for (transitively) any lease that is held by this thread + auto wait_tid = record->second.tid; + while (1) { + if (wait_tid == tid) + return true; + if ((signed)Awaiting.size() <= wait_tid) + break; // no cycle, since it is running (and this should be unreachable) + auto key2 = Awaiting[wait_tid]; + if (key2.mi == nullptr) + break; // no cycle, since it is running + auto record2 = Reservations.find(key2); + if (record2 == Reservations.end()) + break; // no cycle, since it is about to resume + assert(wait_tid != record2->second.tid); + wait_tid = record2->second.tid; + } + Awaiting[tid] = key; + lock.wait(engine_wait); + Awaiting[tid] = InferKey{}; } - if ((signed)Awaiting.size() <= wait_tid) - break; // no cycle, since it is running (and this should be unreachable) - auto key2 = Awaiting[wait_tid]; - if (key2.mi == nullptr) - break; // no cycle, since it is running - auto record2 = Reservations.find(key2); - if (record2 == Reservations.end()) - break; // no cycle, since it is about to resume - assert(wait_tid != record2->second.tid); - wait_tid = record2->second.tid; - } - Awaiting[tid] = key; - engine_wait.wait(lock); - Awaiting[tid] = InferKey{}; - } + })()) + ct->ptls->engine_nqueued--; + JL_GC_POP(); + return ci; } int jl_engine_hasreserved(jl_method_instance_t *m, jl_value_t *owner) { jl_task_t *ct = jl_current_task; InferKey key = {m, owner}; - std::unique_lock lock(engine_lock); + std::unique_lock lock(engine_lock); auto record = Reservations.find(key); return record != Reservations.end() && record->second.tid == jl_atomic_load_relaxed(&ct->tid); } @@ -123,7 +119,7 @@ STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT void jl_engine_sweep(jl_ptls_t *gc_all_tls_states) { - std::unique_lock lock(engine_lock); + std::unique_lock lock(engine_lock); bool any = false; for (auto I = Reservations.begin(); I != Reservations.end(); ++I) { jl_code_instance_t *ci = I->second.ci; @@ -142,7 +138,7 @@ void jl_engine_sweep(jl_ptls_t *gc_all_tls_states) void jl_engine_fulfill(jl_code_instance_t *ci, jl_code_info_t *src) { jl_task_t *ct = jl_current_task; - std::unique_lock lock(engine_lock); + std::unique_lock lock(engine_lock); auto record = Reservations.find(InferKey{ci->def, ci->owner}); if (record == Reservations.end() || record->second.ci != ci) return; @@ -152,7 +148,6 @@ void jl_engine_fulfill(jl_code_instance_t *ci, jl_code_info_t *src) engine_wait.notify_all(); } - #ifdef __cplusplus } #endif diff --git a/src/gf.c b/src/gf.c index fc2e62ebff96b..e77c950c38ae4 100644 --- a/src/gf.c +++ b/src/gf.c @@ -3222,6 +3222,23 @@ jl_value_t *jl_argtype_with_function_type(jl_value_t *ft JL_MAYBE_UNROOTED, jl_v return tt; } +// undo jl_argtype_with_function transform +jl_value_t *jl_argtype_without_function(jl_value_t *ftypes) +{ + jl_value_t *types = jl_unwrap_unionall(ftypes); + size_t l = jl_nparams(types); + if (l == 1 && jl_is_vararg(jl_tparam0(types))) + return ftypes; + jl_value_t *tt = (jl_value_t*)jl_alloc_svec(l - 1); + JL_GC_PUSH1(&tt); + for (size_t i = 1; i < l; i++) + jl_svecset(tt, i - 1, jl_tparam(types, i)); + tt = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)tt, 0); + tt = jl_rewrap_unionall_(tt, types); + JL_GC_POP(); + return tt; +} + #ifdef JL_TRACE static int trace_en = 0; static int error_en = 1; diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 313449dda5557..8b8004af03616 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -64,9 +64,6 @@ using namespace llvm; #define DEBUG_TYPE "julia_jitlayers" STATISTIC(LinkedGlobals, "Number of globals linked"); -STATISTIC(CompiledCodeinsts, "Number of codeinsts compiled directly"); -STATISTIC(MaxWorkqueueSize, "Maximum number of elements in the workqueue"); -STATISTIC(IndirectCodeinsts, "Number of dependent codeinsts compiled"); STATISTIC(SpecFPtrCount, "Number of specialized function pointers compiled"); STATISTIC(UnspecFPtrCount, "Number of specialized function pointers compiled"); STATISTIC(ModulesAdded, "Number of modules added to the JIT"); @@ -151,13 +148,6 @@ void jl_dump_llvm_opt_impl(void *s) **jl_ExecutionEngine->get_dump_llvm_opt_stream() = (ios_t*)s; } -#ifndef JL_USE_JITLINK -static int jl_add_to_ee( - orc::ThreadSafeModule &M, - const StringMap &NewExports, - DenseMap &Queued, - SmallVectorImpl &Stack) JL_NOTSAFEPOINT; -#endif static void jl_decorate_module(Module &M) JL_NOTSAFEPOINT; void jl_link_global(GlobalVariable *GV, void *addr) JL_NOTSAFEPOINT @@ -187,214 +177,536 @@ void jl_jit_globals(std::map &globals) JL_NOTSAFEPOINT } } -// this generates llvm code for the lambda info -// and adds the result to the jitlayers -// (and the shadow module), -// and generates code for it -static jl_callptr_t _jl_compile_codeinst( - jl_code_instance_t *codeinst, - jl_code_info_t *src, - orc::ThreadSafeContext context) + // lock for places where only single threaded behavior is implemented, so we need GC support +static jl_mutex_t jitlock; + // locks for adding external code to the JIT atomically +static std::mutex extern_c_lock; + // locks and barriers for this state +static std::mutex engine_lock; +static std::condition_variable engine_wait; +static int threads_in_compiler_phase; + // the TSM for each codeinst +static SmallVector sharedmodules; +static DenseMap emittedmodules; + // the invoke and specsig function names in the JIT +static DenseMap invokenames; + // everything that any thread wants to compile right now +static DenseSet compileready; + // everything that any thread has compiled recently +static DenseSet linkready; + // a map from a codeinst to the outgoing edges needed before linking it +static DenseMap> complete_graph; + // the state for each codeinst and the number of unresolved edges (we don't + // really need this once JITLink is available everywhere, since every module + // is automatically complete, and we can emit any required fixups later as a + // separate module) +static DenseMap> incompletemodules; + // the set of incoming unresolved edges resolved by a codeinstance +static DenseMap> incomplete_rgraph; + +// Lock hierarchy here: +// jitlock is outermost, can contain others and allows GC +// engine_lock is next +// ThreadSafeContext locks are next, they should not be nested (unless engine_lock is also held, but this may make TSAN sad anyways) +// extern_c_lock is next +// jl_ExecutionEngine internal locks are exclusive to this list, since OrcJIT promises to never hold a lock over a materialization unit: +// construct a query object from a query set and query handler +// lock the session +// lodge query against requested symbols, collect required materializers (if any) +// unlock the session +// dispatch materializers (if any) +// However, this guarantee relies on Julia releasing all TSC locks before causing any materialization units to be dispatched +// as materialization may need to acquire TSC locks. + + +static void finish_params(Module *M, jl_codegen_params_t ¶ms) JL_NOTSAFEPOINT { - // caller must hold codegen_lock - // and have disabled finalizers - uint64_t start_time = 0; - bool timed = !!*jl_ExecutionEngine->get_dump_compiles_stream(); - if (timed) - start_time = jl_hrtime(); + if (params._shared_module) { + sharedmodules.push_back(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx)); + } + + // In imaging mode, we can't inline global variable initializers in order to preserve + // the fiction that we don't know what loads from the global will return. Thus, we + // need to emit a separate module for the globals before any functions are compiled, + // to ensure that the globals are defined when they are compiled. + if (params.imaging_mode) { + if (!params.global_targets.empty()) { + void **globalslots = new void*[params.global_targets.size()]; + void **slot = globalslots; + for (auto &global : params.global_targets) { + auto GV = global.second; + *slot = global.first; + jl_ExecutionEngine->addGlobalMapping(GV->getName(), (uintptr_t)slot); + slot++; + } +#ifdef __clang_analyzer__ + static void **leaker = globalslots; // for the purpose of the analyzer, we need to expressly leak this variable or it thinks we forgot to free it +#endif + } + } + else { + StringMap NewGlobals; + for (auto &global : params.global_targets) { + NewGlobals[global.second->getName()] = global.first; + } + for (auto &GV : M->globals()) { + auto InitValue = NewGlobals.find(GV.getName()); + if (InitValue != NewGlobals.end()) { + jl_link_global(&GV, InitValue->second); + } + } + } +} - assert(jl_is_code_instance(codeinst)); - JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE); - jl_callptr_t fptr = NULL; - // emit the code in LLVM IR form - jl_codegen_params_t params(std::move(context), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context - params.cache = true; - params.imaging_mode = imaging_default(); - params.debug_level = jl_options.debug_level; - { - orc::ThreadSafeModule result_m = - jl_create_ts_module(name_from_method_instance(codeinst->def), params.tsctx, params.DL, params.TargetTriple); - jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params); - if (result_m) - params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; - jl_compile_workqueue(params, CompilationPolicy::Default); - - if (params._shared_module) { - jl_ExecutionEngine->optimizeDLSyms(*params._shared_module); - jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx)); +static int jl_analyze_workqueue(jl_code_instance_t *callee, jl_codegen_params_t ¶ms, bool forceall=false) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER +{ + jl_task_t *ct = jl_current_task; + decltype(params.workqueue) edges; + std::swap(params.workqueue, edges); + for (auto &it : edges) { + jl_code_instance_t *codeinst = it.first; + auto &proto = it.second; + // try to emit code for this item from the workqueue + StringRef invokeName = ""; + StringRef preal_decl = ""; + bool preal_specsig = false; + jl_callptr_t invoke = nullptr; + bool isedge = false; + assert(params.cache); + // Checking the cache here is merely an optimization and not strictly required + // But it must be consistent with the following invokenames lookup, which is protected by the engine_lock + uint8_t specsigflags; + void *fptr; + jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0); + //if (specsig ? specsigflags & 0b1 : invoke == jl_fptr_args_addr) + if (invoke == jl_fptr_args_addr) { + preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); } - - // In imaging mode, we can't inline global variable initializers in order to preserve - // the fiction that we don't know what loads from the global will return. Thus, we - // need to emit a separate module for the globals before any functions are compiled, - // to ensure that the globals are defined when they are compiled. - if (params.imaging_mode) { - // Won't contain any PLT/dlsym calls, so no need to optimize those - if (!params.global_targets.empty()) { - void **globalslots = new void*[params.global_targets.size()]; - void **slot = globalslots; - for (auto &global : params.global_targets) { - auto GV = global.second; - *slot = global.first; - jl_ExecutionEngine->addGlobalMapping(GV->getName(), (uintptr_t)slot); - slot++; + else if (specsigflags & 0b1) { + preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); + preal_specsig = true; + } + bool force = forceall || invoke != nullptr; + if (preal_decl.empty()) { + auto it = invokenames.find(codeinst); + if (it != invokenames.end()) { + auto &decls = it->second; + invokeName = decls.functionObject; + if (decls.functionObject == "jl_fptr_args") { + preal_decl = decls.specFunctionObject; + isedge = true; } -#ifdef __clang_analyzer__ - static void **leaker = globalslots; // for the purpose of the analyzer, we need to expressly leak this variable or it thinks we forgot to free it -#endif + else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") { + preal_decl = decls.specFunctionObject; + preal_specsig = true; + isedge = true; + } + force = true; } } - else { - StringMap NewGlobals; - for (auto &global : params.global_targets) { - NewGlobals[global.second->getName()] = global.first; + if (!preal_decl.empty() || force) { + // if we have a prototype emitted, compare it to what we emitted earlier + Module *mod = proto.decl->getParent(); + assert(proto.decl->isDeclaration()); + Function *pinvoke = nullptr; + if (preal_decl.empty()) { + if (invoke != nullptr && invokeName.empty()) { + assert(invoke != jl_fptr_args_addr); + if (invoke == jl_fptr_sparam_addr) + invokeName = "jl_fptr_sparam"; + else if (invoke == jl_f_opaque_closure_call_addr) + invokeName = "jl_f_opaque_closure_call"; + else + invokeName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst); + } + pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params); + if (!proto.specsig) + proto.decl->replaceAllUsesWith(pinvoke); + isedge = false; } - for (auto &def : params.compiled_functions) { - auto M = std::get<0>(def.second).getModuleUnlocked(); - for (auto &GV : M->globals()) { - auto InitValue = NewGlobals.find(GV.getName()); - if (InitValue != NewGlobals.end()) { - jl_link_global(&GV, InitValue->second); - } + if (proto.specsig && !preal_specsig) { + // get or build an fptr1 that can invoke codeinst + if (pinvoke == nullptr) + pinvoke = get_or_emit_fptr1(preal_decl, mod); + // emit specsig-to-(jl)invoke conversion + proto.decl->setLinkage(GlobalVariable::InternalLinkage); + //protodecl->setAlwaysInline(); + jl_init_function(proto.decl, params.TargetTriple); + // TODO: maybe this can be cached in codeinst->specfptr? + int8_t gc_state = jl_gc_unsafe_enter(ct->ptls); // codegen may contain safepoints (such as jl_subtype calls) + jl_method_instance_t *mi = codeinst->def; + size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed + bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; + emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke, 0, 0); + jl_gc_unsafe_leave(ct->ptls, gc_state); + preal_decl = ""; // no need to fixup the name + } + if (!preal_decl.empty()) { + // merge and/or rename this prototype to the real function + if (Value *specfun = mod->getNamedValue(preal_decl)) { + if (proto.decl != specfun) + proto.decl->replaceAllUsesWith(specfun); + } + else { + proto.decl->setName(preal_decl); } } - } - -#ifndef JL_USE_JITLINK - // Collect the exported functions from the params.compiled_functions modules, - // which form dependencies on which functions need to be - // compiled first. Cycles of functions are compiled together. - // (essentially we compile a DAG of SCCs in reverse topological order, - // if we treat declarations of external functions as edges from declaration - // to definition) - StringMap NewExports; - for (auto &def : params.compiled_functions) { - orc::ThreadSafeModule &TSM = std::get<0>(def.second); - //The underlying context object is still locked because params is not destroyed yet - auto M = TSM.getModuleUnlocked(); - jl_ExecutionEngine->optimizeDLSyms(*M); - for (auto &F : M->global_objects()) { - if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) { - NewExports[F.getName()] = &TSM; + if (proto.oc) { // additionally, if we are dealing with an OC constructor, then we might also need to fix up the fptr1 reference too + assert(proto.specsig); + StringRef ocinvokeDecl = invokeName; + if (invoke != nullptr && ocinvokeDecl.empty()) { + // check for some special tokens used by opaque_closure.c and convert those to their real functions + assert(invoke != jl_fptr_args_addr); + assert(invoke != jl_fptr_sparam_addr); + if (invoke == jl_fptr_interpret_call_addr) + ocinvokeDecl = "jl_fptr_interpret_call"; + else if (invoke == jl_fptr_const_return_addr) + ocinvokeDecl = "jl_fptr_const_return"; + else if (invoke == jl_f_opaque_closure_call_addr) + ocinvokeDecl = "jl_f_opaque_closure_call"; + //else if (invoke == jl_interpret_opaque_closure_addr) + else + ocinvokeDecl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst); + } + // if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too + // XXX: this invoke translation logic is supposed to exactly match new_opaque_closure + if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return") { + if (pinvoke == nullptr) + ocinvokeDecl = get_or_emit_fptr1(preal_decl, mod)->getName(); + else + ocinvokeDecl = pinvoke->getName(); + } + assert(!ocinvokeDecl.empty()); + assert(ocinvokeDecl != "jl_fptr_args"); + assert(ocinvokeDecl != "jl_fptr_sparam"); + // merge and/or rename this prototype to the real function + if (Value *specfun = mod->getNamedValue(ocinvokeDecl)) { + if (proto.oc != specfun) + proto.oc->replaceAllUsesWith(specfun); + } + else { + proto.oc->setName(ocinvokeDecl); } } } - DenseMap Queued; - SmallVector Stack; - for (auto &def : params.compiled_functions) { - // Add the results to the execution engine now - orc::ThreadSafeModule &M = std::get<0>(def.second); - jl_add_to_ee(M, NewExports, Queued, Stack); - assert(Queued.empty() && Stack.empty() && !M); + else { + isedge = true; + params.workqueue.push_back(it); + incomplete_rgraph[codeinst].push_back(callee); } -#else - for (auto &def : params.compiled_functions) { - // Add the results to the execution engine now - orc::ThreadSafeModule &M = std::get<0>(def.second); - if (M) - jl_ExecutionEngine->addModule(std::move(M)); + if (isedge) + complete_graph[callee].push_back(codeinst); + } + return params.workqueue.size(); +} + +// test whether codeinst->invoke is usable already without further compilation needed +static bool jl_is_compiled_codeinst(jl_code_instance_t *codeinst) JL_NOTSAFEPOINT +{ + auto invoke = jl_atomic_load_relaxed(&codeinst->invoke); + if (invoke == nullptr || invoke == jl_fptr_wait_for_compiled_addr) + return false; + return true; +} + +// move codeinst (and deps) from incompletemodules to emitted modules +// and populate compileready from complete_graph +static void prepare_compile(jl_code_instance_t *codeinst) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER +{ + SmallVector workqueue; + workqueue.push_back(codeinst); + while (!workqueue.empty()) { + codeinst = workqueue.pop_back_val(); + if (!invokenames.count(codeinst)) { + // this means it should be compiled already while the callee was in stasis + assert(jl_is_compiled_codeinst(codeinst)); + continue; } -#endif - ++CompiledCodeinsts; - MaxWorkqueueSize.updateMax(params.compiled_functions.size()); - IndirectCodeinsts += params.compiled_functions.size() - 1; - } - - // batch compile job for all new functions - SmallVector NewDefs; - for (auto &def : params.compiled_functions) { - jl_llvm_functions_t &decls = std::get<1>(def.second); - if (decls.functionObject != "jl_fptr_args" && - decls.functionObject != "jl_fptr_sparam" && - decls.functionObject != "jl_f_opaque_closure_call") - NewDefs.push_back(decls.functionObject); - if (!decls.specFunctionObject.empty()) - NewDefs.push_back(decls.specFunctionObject); - } - auto Addrs = jl_ExecutionEngine->findSymbols(NewDefs); - - size_t i = 0; - size_t nextaddr = 0; - for (auto &def : params.compiled_functions) { - jl_code_instance_t *this_code = def.first; - if (i < jl_timing_print_limit) - jl_timing_show_func_sig(this_code->def->specTypes, JL_TIMING_DEFAULT_BLOCK); - - jl_llvm_functions_t &decls = std::get<1>(def.second); - jl_callptr_t addr; - bool isspecsig = false; - if (decls.functionObject == "jl_fptr_args") { - addr = jl_fptr_args_addr; + // if this was incomplete, force completion now of it + auto it = incompletemodules.find(codeinst); + if (it != incompletemodules.end()) { + int waiting = 0; + auto &edges = complete_graph[codeinst]; + auto edges_end = std::remove_if(edges.begin(), edges.end(), [&waiting, codeinst] (jl_code_instance_t *edge) JL_NOTSAFEPOINT -> bool { + auto &redges = incomplete_rgraph[edge]; + // waiting += std::erase(redges, codeinst); + auto redges_end = std::remove(redges.begin(), redges.end(), codeinst); + if (redges_end != redges.end()) { + waiting += redges.end() - redges_end; + redges.erase(redges_end, redges.end()); + assert(!invokenames.count(edge)); + } + return !invokenames.count(edge); + }); + edges.erase(edges_end, edges.end()); + assert(waiting == std::get<1>(it->second)); + std::get<1>(it->second) = 0; + auto ¶ms = std::get<0>(it->second); + params.tsctx_lock = params.tsctx.getLock(); + waiting = jl_analyze_workqueue(codeinst, params, true); // may safepoint + assert(!waiting); (void)waiting; + Module *M = emittedmodules[codeinst].getModuleUnlocked(); + finish_params(M, params); + incompletemodules.erase(it); + } + // and then indicate this should be compiled now + if (!linkready.count(codeinst) && compileready.insert(codeinst).second) { + auto edges = complete_graph.find(codeinst); + if (edges != complete_graph.end()) { + workqueue.append(edges->second); + } + } + } +} + +// notify any other pending work that this edge now has code defined +static void complete_emit(jl_code_instance_t *edge) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER +{ + auto notify = incomplete_rgraph.find(edge); + if (notify == incomplete_rgraph.end()) + return; + auto redges = std::move(notify->second); + incomplete_rgraph.erase(notify); + for (size_t i = 0; i < redges.size(); i++) { + jl_code_instance_t *callee = redges[i]; + auto it = incompletemodules.find(callee); + assert(it != incompletemodules.end()); + if (--std::get<1>(it->second) == 0) { + auto ¶ms = std::get<0>(it->second); + params.tsctx_lock = params.tsctx.getLock(); + assert(callee == it->first); + int waiting = jl_analyze_workqueue(callee, params); // may safepoint + assert(!waiting); (void)waiting; + Module *M = emittedmodules[callee].getModuleUnlocked(); + finish_params(M, params); + incompletemodules.erase(it); } - else if (decls.functionObject == "jl_fptr_sparam") { - addr = jl_fptr_sparam_addr; + } +} + + +// set the invoke field for codeinst (and all deps, and assist with other pending work from other threads) now +static void jl_compile_codeinst_now(jl_code_instance_t *codeinst) +{ + jl_unique_gcsafe_lock lock(engine_lock); + if (!invokenames.count(codeinst)) + return; + threads_in_compiler_phase++; + prepare_compile(codeinst); // may safepoint + while (1) { + // TODO: split up this work by ThreadSafeContext, so two threads don't need to get the same locks and stall + if (!sharedmodules.empty()) { + auto TSM = sharedmodules.pop_back_val(); + lock.native.unlock(); + { + auto Lock = TSM.getContext().getLock(); + jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint + } + jl_ExecutionEngine->addModule(std::move(TSM)); + lock.native.lock(); } - else if (decls.functionObject == "jl_f_opaque_closure_call") { - addr = jl_f_opaque_closure_call_addr; + else if (!compileready.empty()) { + // move a function from compileready to linkready then compile it + auto compilenext = compileready.begin(); + codeinst = *compilenext; + compileready.erase(compilenext); + auto TSMref = emittedmodules.find(codeinst); + assert(TSMref != emittedmodules.end()); + auto TSM = std::move(TSMref->second); + linkready.insert(codeinst); + emittedmodules.erase(TSMref); + lock.native.unlock(); + uint64_t start_time = jl_hrtime(); + { + auto Lock = TSM.getContext().getLock(); + jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint + } + jl_ExecutionEngine->addModule(std::move(TSM)); // may safepoint + // If logging of the compilation stream is enabled, + // then dump the method-instance specialization type to the stream + jl_method_instance_t *mi = codeinst->def; + if (jl_is_method(mi->def.method)) { + auto stream = *jl_ExecutionEngine->get_dump_compiles_stream(); + if (stream) { + uint64_t end_time = jl_hrtime(); + ios_printf(stream, "%" PRIu64 "\t\"", end_time - start_time); + jl_static_show((JL_STREAM*)stream, mi->specTypes); + ios_printf(stream, "\"\n"); + } + } + lock.native.lock(); } else { - assert(NewDefs[nextaddr] == decls.functionObject); - addr = (jl_callptr_t)Addrs[nextaddr++]; - assert(addr); - isspecsig = true; + break; } - if (!decls.specFunctionObject.empty()) { - void *prev_specptr = NULL; - assert(NewDefs[nextaddr] == decls.specFunctionObject); - void *spec = (void*)Addrs[nextaddr++]; - assert(spec); - if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) { - // only set specsig and invoke if we were the first to set specptr - jl_atomic_store_relaxed(&this_code->specsigflags, (uint8_t) isspecsig); - // we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr - // either assumes that specptr was null, doesn't care about specptr, - // or will wait until specsigflags has 0b10 set before reloading invoke - jl_atomic_store_release(&this_code->invoke, addr); - jl_atomic_store_release(&this_code->specsigflags, (uint8_t) (0b10 | isspecsig)); - } else { - //someone else beat us, don't commit any results - while (!(jl_atomic_load_acquire(&this_code->specsigflags) & 0b10)) { - jl_cpu_pause(); + } + codeinst = nullptr; + // barrier until all threads have finished calling addModule + if (--threads_in_compiler_phase == 0) { + // the last thread out will finish linking everything + // then release all of the other threads + // move the function pointers out from invokenames to the codeinst + + // batch compile job for all new functions + SmallVector NewDefs; + for (auto &this_code : linkready) { + auto it = invokenames.find(this_code); + assert(it != invokenames.end()); + jl_llvm_functions_t &decls = it->second; + assert(!decls.functionObject.empty()); + if (decls.functionObject != "jl_fptr_args" && + decls.functionObject != "jl_fptr_sparam" && + decls.functionObject != "jl_f_opaque_closure_call") + NewDefs.push_back(decls.functionObject); + if (!decls.specFunctionObject.empty()) + NewDefs.push_back(decls.specFunctionObject); + } + auto Addrs = jl_ExecutionEngine->findSymbols(NewDefs); + + size_t nextaddr = 0; + for (auto &this_code : linkready) { + auto it = invokenames.find(this_code); + assert(it != invokenames.end()); + jl_llvm_functions_t &decls = it->second; + jl_callptr_t addr; + bool isspecsig = false; + if (decls.functionObject == "jl_fptr_args") { + addr = jl_fptr_args_addr; + } + else if (decls.functionObject == "jl_fptr_sparam") { + addr = jl_fptr_sparam_addr; + } + else if (decls.functionObject == "jl_f_opaque_closure_call") { + addr = jl_f_opaque_closure_call_addr; + } + else { + assert(NewDefs[nextaddr] == decls.functionObject); + addr = (jl_callptr_t)Addrs[nextaddr++]; + assert(addr); + isspecsig = true; + } + if (!decls.specFunctionObject.empty()) { + void *prev_specptr = nullptr; + assert(NewDefs[nextaddr] == decls.specFunctionObject); + void *spec = (void*)Addrs[nextaddr++]; + assert(spec); + if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) { + // only set specsig and invoke if we were the first to set specptr + jl_atomic_store_relaxed(&this_code->specsigflags, (uint8_t) isspecsig); + // we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr + // either assumes that specptr was null, doesn't care about specptr, + // or will wait until specsigflags has 0b10 set before reloading invoke + jl_atomic_store_release(&this_code->invoke, addr); + jl_atomic_store_release(&this_code->specsigflags, (uint8_t) (0b10 | isspecsig)); + } + else { + //someone else beat us, don't commit any results + while (!(jl_atomic_load_acquire(&this_code->specsigflags) & 0b10)) { + jl_cpu_pause(); + } + addr = jl_atomic_load_relaxed(&this_code->invoke); } - addr = jl_atomic_load_relaxed(&this_code->invoke); } - } else { - jl_callptr_t prev_invoke = NULL; - // Allow replacing addr if it is either NULL or our special waiting placeholder. - if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) { - if (prev_invoke == jl_fptr_wait_for_compiled_addr && !jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) { - addr = prev_invoke; - //TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other - //known lesser function) + else { + jl_callptr_t prev_invoke = nullptr; + // Allow replacing addr if it is either nullptr or our special waiting placeholder. + if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) { + if (prev_invoke == jl_fptr_wait_for_compiled_addr && !jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) { + addr = prev_invoke; + //TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other + //known lesser function) + } } } + invokenames.erase(it); + complete_graph.erase(this_code); } - if (this_code == codeinst) - fptr = addr; - i++; + linkready.clear(); + engine_wait.notify_all(); + } + else while (threads_in_compiler_phase) { + lock.wait(engine_wait); } - if (i > jl_timing_print_limit) - jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, "... <%d methods truncated>", i - 10); +} - uint64_t end_time = 0; - if (timed) - end_time = jl_hrtime(); - - // If logging of the compilation stream is enabled, - // then dump the method-instance specialization type to the stream - jl_method_instance_t *mi = codeinst->def; - if (jl_is_method(mi->def.method)) { - auto stream = *jl_ExecutionEngine->get_dump_compiles_stream(); - if (stream) { - ios_printf(stream, "%" PRIu64 "\t\"", end_time - start_time); - jl_static_show((JL_STREAM*)stream, mi->specTypes); - ios_printf(stream, "\"\n"); +static void jl_emit_codeinst_to_jit( + jl_code_instance_t *codeinst, + jl_code_info_t *src) +{ + { // lock scope + jl_unique_gcsafe_lock lock(engine_lock); + if (invokenames.count(codeinst) || jl_is_compiled_codeinst(codeinst)) + return; + } + JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE); + // emit the code in LLVM IR form to the new context + jl_codegen_params_t params(std::make_unique(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context + params.cache = true; + params.imaging_mode = imaging_default(); + params.debug_level = jl_options.debug_level; + orc::ThreadSafeModule result_m = + jl_create_ts_module(name_from_method_instance(codeinst->def), params.tsctx, params.DL, params.TargetTriple); + jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params); // contains safepoints + if (!result_m) + return; + { // drop lock before acquiring engine_lock + auto release = std::move(params.tsctx_lock); + } + jl_unique_gcsafe_lock lock(engine_lock); + if (invokenames.count(codeinst) || jl_is_compiled_codeinst(codeinst)) + return; // destroy everything + invokenames[codeinst] = std::move(decls); + complete_emit(codeinst); + params.tsctx_lock = params.tsctx.getLock(); // re-acquire lock + int waiting = jl_analyze_workqueue(codeinst, params); + if (waiting) { + auto release = std::move(params.tsctx_lock); // unlock again before moving from it + incompletemodules.insert(std::pair(codeinst, std::tuple(std::move(params), waiting))); + } + else { + finish_params(result_m.getModuleUnlocked(), params); + } + emittedmodules[codeinst] = std::move(result_m); +} + +static void recursive_compile_graph( + jl_code_instance_t *codeinst, + jl_code_info_t *src) +{ + jl_emit_codeinst_to_jit(codeinst, src); + DenseSet Seen; + SmallVector workqueue; + workqueue.push_back(codeinst); + // if any edges were incomplete, try to complete them now + while (!workqueue.empty()) { + auto this_code = workqueue.pop_back_val(); + if (Seen.insert(this_code).second) { + if (this_code != codeinst) + jl_emit_codeinst_to_jit(this_code, nullptr); // contains safepoints + jl_unique_gcsafe_lock lock(engine_lock); + auto edges = complete_graph.find(this_code); + if (edges != complete_graph.end()) { + workqueue.append(edges->second); + } } } - return fptr; } +// this generates llvm code for the lambda info +// and adds the result to the jitlayers +// (and the shadow module), +// and generates code for it +static jl_callptr_t _jl_compile_codeinst( + jl_code_instance_t *codeinst, + jl_code_info_t *src) +{ + recursive_compile_graph(codeinst, src); + jl_compile_codeinst_now(codeinst); + return jl_atomic_load_acquire(&codeinst->invoke); +} + + const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t ¶ms); // compile a C-callable alias @@ -415,42 +727,40 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void * orc::ThreadSafeModule backing; if (into == NULL) { if (!pparams) { - ctx = jl_ExecutionEngine->acquireContext(); + ctx = jl_ExecutionEngine->makeContext(); } backing = jl_create_ts_module("cextern", pparams ? pparams->tsctx : ctx, pparams ? pparams->DL : jl_ExecutionEngine->getDataLayout(), pparams ? pparams->TargetTriple : jl_ExecutionEngine->getTargetTriple()); into = &backing; } - auto target_info = into->withModuleDo([&](Module &M) { - return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple())); - }); - jl_codegen_params_t params(into->getContext(), std::move(target_info.first), std::move(target_info.second)); - params.imaging_mode = imaging_default(); - params.debug_level = jl_options.debug_level; - if (pparams == NULL) - pparams = ¶ms; - assert(pparams->tsctx.getContext() == into->getContext().getContext()); - const char *name = jl_generate_ccallable(wrap(into), sysimg, declrt, sigt, *pparams); bool success = true; - if (!sysimg) { - JL_LOCK(&jl_ExecutionEngine->jitlock); - if (jl_ExecutionEngine->getGlobalValueAddress(name)) { - success = false; - } - if (success && p == NULL) { - jl_jit_globals(params.global_targets); - assert(params.workqueue.empty()); - if (params._shared_module) { - jl_ExecutionEngine->optimizeDLSyms(*params._shared_module); - jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx)); + { + auto Lock = into->getContext().getLock(); + Module *M = into->getModuleUnlocked(); + jl_codegen_params_t params(into->getContext(), M->getDataLayout(), Triple(M->getTargetTriple())); + params.imaging_mode = imaging_default(); + params.debug_level = jl_options.debug_level; + if (pparams == NULL) + pparams = ¶ms; + assert(pparams->tsctx.getContext() == into->getContext().getContext()); + const char *name = jl_generate_ccallable(wrap(into), sysimg, declrt, sigt, *pparams); + if (!sysimg) { + jl_unique_gcsafe_lock lock(extern_c_lock); + if (jl_ExecutionEngine->getGlobalValueAddress(name)) { + success = false; + } + if (success && p == NULL) { + jl_jit_globals(params.global_targets); + assert(params.workqueue.empty()); + if (params._shared_module) { + jl_ExecutionEngine->optimizeDLSyms(*params._shared_module); // safepoint + jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx)); + } + } + if (success && llvmmod == NULL) { + jl_ExecutionEngine->optimizeDLSyms(*M); // safepoint + jl_ExecutionEngine->addModule(std::move(*into)); } } - if (success && llvmmod == NULL) { - into->withModuleDo([&](Module &M) { - jl_ExecutionEngine->optimizeDLSyms(M); - }); - jl_ExecutionEngine->addModule(std::move(*into)); - } - JL_UNLOCK(&jl_ExecutionEngine->jitlock); // Might GC } if (timed) { if (measure_compile_time_enabled) { @@ -459,9 +769,6 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void * } ct->reentrant_timing &= ~1ull; } - if (ctx.getContext()) { - jl_ExecutionEngine->releaseContext(std::move(ctx)); - } return success; } @@ -512,18 +819,13 @@ extern "C" JL_DLLEXPORT_CODEGEN int jl_compile_codeinst_impl(jl_code_instance_t *ci) { int newly_compiled = 0; - if (jl_atomic_load_relaxed(&ci->invoke) != NULL) { - return newly_compiled; - } - JL_LOCK(&jl_ExecutionEngine->jitlock); if (jl_atomic_load_relaxed(&ci->invoke) == NULL) { ++SpecFPtrCount; uint64_t start = jl_typeinf_timing_begin(); - _jl_compile_codeinst(ci, NULL, *jl_ExecutionEngine->getContext()); + _jl_compile_codeinst(ci, NULL); jl_typeinf_timing_end(start, 0); newly_compiled = 1; } - JL_UNLOCK(&jl_ExecutionEngine->jitlock); // Might GC return newly_compiled; } @@ -541,38 +843,39 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec) uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); - JL_LOCK(&jl_ExecutionEngine->jitlock); - if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) { - jl_code_info_t *src = NULL; - JL_GC_PUSH1(&src); - jl_method_t *def = unspec->def->def.method; - if (jl_is_method(def)) { - src = (jl_code_info_t*)def->source; - if (src && (jl_value_t*)src != jl_nothing) - src = jl_uncompress_ir(def, NULL, (jl_value_t*)src); - } - else { - jl_method_instance_t *mi = unspec->def; - jl_code_instance_t *uninferred = jl_cached_uninferred( - jl_atomic_load_relaxed(&mi->cache), 1); - assert(uninferred); - src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred->inferred); - assert(src); - } - if (src) { + jl_code_info_t *src = NULL; + JL_GC_PUSH1(&src); + jl_method_t *def = unspec->def->def.method; + if (jl_is_method(def)) { + src = (jl_code_info_t*)def->source; + if (src && (jl_value_t*)src != jl_nothing) + src = jl_uncompress_ir(def, NULL, (jl_value_t*)src); + } + else { + jl_method_instance_t *mi = unspec->def; + jl_code_instance_t *uninferred = jl_cached_uninferred( + jl_atomic_load_relaxed(&mi->cache), 1); + assert(uninferred); + src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred->inferred); + assert(src); + } + if (src) { + // TODO: first prepare recursive_compile_graph(unspec, src) before taking this lock to avoid recursion? + JL_LOCK(&jitlock); // TODO: use a better lock + if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) { assert(jl_is_code_info(src)); ++UnspecFPtrCount; jl_debuginfo_t *debuginfo = src->debuginfo; jl_atomic_store_release(&unspec->debuginfo, debuginfo); // n.b. this assumes the field was previously NULL, which is not entirely true jl_gc_wb(unspec, debuginfo); - _jl_compile_codeinst(unspec, src, *jl_ExecutionEngine->getContext()); + _jl_compile_codeinst(unspec, src); } - jl_callptr_t null = nullptr; - // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort - jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr); - JL_GC_POP(); + JL_UNLOCK(&jitlock); // Might GC } - JL_UNLOCK(&jl_ExecutionEngine->jitlock); // Might GC + JL_GC_POP(); + jl_callptr_t null = nullptr; + // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort + jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr); if (timed) { if (measure_compile_time_enabled) { auto end = jl_hrtime(); @@ -634,8 +937,8 @@ static auto countBasicBlocks(const Function &F) JL_NOTSAFEPOINT static constexpr size_t N_optlevels = 4; -static Expected selectOptLevel(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) { - TSM.withModuleDo([](Module &M) { +static orc::ThreadSafeModule selectOptLevel(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { + TSM.withModuleDo([](Module &M) JL_NOTSAFEPOINT { size_t opt_level = std::max(static_cast(jl_options.opt_level), 0); do { if (jl_generating_output()) { @@ -661,7 +964,10 @@ static Expected selectOptLevel(orc::ThreadSafeModule TSM, opt_level = std::min(opt_level, N_optlevels - 1); M.addModuleFlag(Module::Warning, "julia.optlevel", opt_level); }); - return std::move(TSM); + return TSM; +} +static orc::ThreadSafeModule selectOptLevel(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + return selectOptLevel(std::move(TSM)); } void jl_register_jit_object(const object::ObjectFile &debugObj, @@ -699,8 +1005,8 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin { { std::lock_guard lock(PluginMutex); assert(PendingObjs.count(&MR) == 0); - PendingObjs[&MR] = std::unique_ptr( - new JITObjectInfo{std::move(NewBuffer), std::move(NewObj), {}}); + PendingObjs[&MR] = std::unique_ptr(new JITObjectInfo{ + std::move(NewBuffer), std::move(NewObj), {}}); } } @@ -870,7 +1176,7 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { // TODO: Port our memory management optimisations to JITLink instead of using the // default InProcessMemoryManager. -std::unique_ptr createJITLinkMemoryManager() { +std::unique_ptr createJITLinkMemoryManager() JL_NOTSAFEPOINT { #if JL_LLVM_VERSION < 160000 return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper()); #else @@ -900,7 +1206,7 @@ class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar { } }; -RTDyldMemoryManager* createRTDyldMemoryManager(void); +RTDyldMemoryManager *createRTDyldMemoryManager(void) JL_NOTSAFEPOINT; // A simple forwarding class, since OrcJIT v2 needs a unique_ptr, while we have a shared_ptr class ForwardingMemoryManager : public RuntimeDyld::MemoryManager { @@ -909,7 +1215,10 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager { public: ForwardingMemoryManager(std::shared_ptr MemMgr) : MemMgr(MemMgr) {} - virtual ~ForwardingMemoryManager() = default; + ForwardingMemoryManager(ForwardingMemoryManager &) = delete; + virtual ~ForwardingMemoryManager() { + assert(!MemMgr); + } virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, StringRef SectionName) override { @@ -947,7 +1256,11 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager { return MemMgr->deregisterEHFrames(); } virtual bool finalizeMemory(std::string *ErrMsg = nullptr) override { - return MemMgr->finalizeMemory(ErrMsg); + bool b = false; + if (MemMgr.use_count() == 2) + b = MemMgr->finalizeMemory(ErrMsg); + MemMgr.reset(); + return b; } virtual void notifyObjectLoaded(RuntimeDyld &RTDyld, const object::ObjectFile &Obj) override { @@ -955,10 +1268,10 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager { } }; - -void registerRTDyldJITObject(const object::ObjectFile &Object, - const RuntimeDyld::LoadedObjectInfo &L, - const std::shared_ptr &MemMgr) +#ifndef JL_USE_JITLINK +static void registerRTDyldJITObject(orc::MaterializationResponsibility &MR, + const object::ObjectFile &Object, + const RuntimeDyld::LoadedObjectInfo &L) { StringMap loadedSections; for (const object::SectionRef &lSection : Object.sections()) { @@ -980,6 +1293,8 @@ void registerRTDyldJITObject(const object::ObjectFile &Object, auto DebugObject = L.getObjectForDebug(Object); // ELF requires us to make a copy to mutate the header with the section load addresses. On other platforms this is a no-op. jl_register_jit_object(DebugObject.getBinary() ? *DebugObject.getBinary() : Object, getLoadAddress); } +#endif + namespace { static std::unique_ptr createTargetMachine() JL_NOTSAFEPOINT { TargetOptions options = TargetOptions(); @@ -1078,9 +1393,6 @@ namespace { fixupTM(*TM); return std::unique_ptr(TM); } -} // namespace - -namespace { typedef NewPM PassManager; @@ -1131,14 +1443,14 @@ namespace { }; template - struct OptimizerT { - OptimizerT(TargetMachine &TM, SmallVector, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT { + struct sizedOptimizerT { + sizedOptimizerT(TargetMachine &TM, SmallVector, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT { for (size_t i = 0; i < N; i++) { PMs[i] = std::make_unique>>(PMCreator(TM, i, printers, llvm_printing_mutex)); } } - OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { auto PoolIdx = cast(cast(M.getModuleFlag("julia.optlevel"))->getValue())->getZExtValue(); assert(PoolIdx < N && "Invalid optimization pool index"); @@ -1243,12 +1555,23 @@ namespace { llvm_unreachable("optlevel is between 0 and 3!"); } }); - return Expected{std::move(TSM)}; + return TSM; } private: std::array>>, N> PMs; }; + // shim for converting a unique_ptr to a TransformFunction to a TransformFunction + template + struct IRTransformRef { + IRTransformRef(T &transform) : transform(transform) {} + OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + return transform(std::move(TSM), R); + } + private: + T &transform; + }; + template struct CompilerT : orc::IRCompileLayer::IRCompiler { @@ -1264,7 +1587,8 @@ namespace { size_t PoolIdx; if (auto opt_level = M.getModuleFlag("julia.optlevel")) { PoolIdx = cast(cast(opt_level)->getValue())->getZExtValue(); - } else { + } + else { PoolIdx = jl_options.opt_level; } assert(PoolIdx < N && "Invalid optimization level for compiler!"); @@ -1273,74 +1597,89 @@ namespace { std::array>>, N> TMs; }; +} - struct JITPointersT { - - JITPointersT(SharedBytesT &SharedBytes, std::mutex &Lock) JL_NOTSAFEPOINT - : SharedBytes(SharedBytes), Lock(Lock) {} +struct JuliaOJIT::OptimizerT { + OptimizerT(TargetMachine &TM, SmallVector, 0> &printers, std::mutex &llvm_printing_mutex) + : opt(TM, printers, llvm_printing_mutex) {} + orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { + return opt(std::move(TSM)); + } + OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + return opt(std::move(TSM)); + } +private: + struct sizedOptimizerT opt; +}; - Expected operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { - TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { - std::lock_guard locked(Lock); - for (auto &GV : make_early_inc_range(M.globals())) { - if (auto *Shared = getSharedBytes(GV)) { - ++InternedGlobals; - GV.replaceAllUsesWith(Shared); - GV.eraseFromParent(); - } +struct JuliaOJIT::JITPointersT { + JITPointersT(SharedBytesT &SharedBytes, std::mutex &Lock) JL_NOTSAFEPOINT + : SharedBytes(SharedBytes), Lock(Lock) {} + + orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { + TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { + std::lock_guard locked(Lock); + for (auto &GV : make_early_inc_range(M.globals())) { + if (auto *Shared = getSharedBytes(GV)) { + ++InternedGlobals; + GV.replaceAllUsesWith(Shared); + GV.eraseFromParent(); } + } - // Windows needs some inline asm to help - // build unwind tables, if they have any functions to decorate - if (!M.functions().empty()) - jl_decorate_module(M); - }); - return std::move(TSM); - } + // Windows needs some inline asm to help + // build unwind tables, if they have any functions to decorate + if (!M.functions().empty()) + jl_decorate_module(M); + }); + return TSM; + } + Expected operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + return operator()(std::move(TSM)); + } - private: - // optimize memory by turning long strings into memoized copies, instead of - // making a copy per object file of output. - // we memoize them using a StringSet with a custom-alignment allocator - // to ensure they are properly aligned - Constant *getSharedBytes(GlobalVariable &GV) JL_NOTSAFEPOINT { - // We could probably technically get away with - // interning even external linkage globals, - // as long as they have global unnamedaddr, - // but currently we shouldn't be emitting those - // except in imaging mode, and we don't want to - // do this optimization there. - if (GV.hasExternalLinkage() || !GV.hasGlobalUnnamedAddr()) { - return nullptr; - } - if (!GV.hasInitializer()) { - return nullptr; - } - if (!GV.isConstant()) { - return nullptr; - } - auto CDS = dyn_cast(GV.getInitializer()); - if (!CDS) { - return nullptr; - } - StringRef Data = CDS->getRawDataValues(); - if (Data.size() < 16) { - // Cutoff, since we don't want to intern small strings - return nullptr; - } - Align Required = GV.getAlign().valueOrOne(); - Align Preferred = MaxAlignedAlloc::alignment(Data.size()); - if (Required > Preferred) - return nullptr; - StringRef Interned = SharedBytes.insert(Data).first->getKey(); - assert(llvm::isAddrAligned(Preferred, Interned.data())); - return literal_static_pointer_val(Interned.data(), GV.getType()); +private: + // optimize memory by turning long strings into memoized copies, instead of + // making a copy per object file of output. + // we memoize them using a StringSet with a custom-alignment allocator + // to ensure they are properly aligned + Constant *getSharedBytes(GlobalVariable &GV) JL_NOTSAFEPOINT { + // We could probably technically get away with + // interning even external linkage globals, + // as long as they have global unnamedaddr, + // but currently we shouldn't be emitting those + // except in imaging mode, and we don't want to + // do this optimization there. + if (GV.hasExternalLinkage() || !GV.hasGlobalUnnamedAddr()) { + return nullptr; } + if (!GV.hasInitializer()) { + return nullptr; + } + if (!GV.isConstant()) { + return nullptr; + } + auto CDS = dyn_cast(GV.getInitializer()); + if (!CDS) { + return nullptr; + } + StringRef Data = CDS->getRawDataValues(); + if (Data.size() < 16) { + // Cutoff, since we don't want to intern small strings + return nullptr; + } + Align Required = GV.getAlign().valueOrOne(); + Align Preferred = MaxAlignedAlloc::alignment(Data.size()); + if (Required > Preferred) + return nullptr; + StringRef Interned = SharedBytes.insert(Data).first->getKey(); + assert(llvm::isAddrAligned(Preferred, Interned.data())); + return literal_static_pointer_val(Interned.data(), GV.getType()); + } - SharedBytesT &SharedBytes; - std::mutex &Lock; - }; -} + SharedBytesT &SharedBytes; + std::mutex &Lock; +}; struct JuliaOJIT::DLSymOptimizer { @@ -1362,20 +1701,24 @@ struct JuliaOJIT::DLSymOptimizer { #undef INIT_RUNTIME_LIBRARY } + ~DLSymOptimizer() JL_NOTSAFEPOINT = default; - void *lookup_symbol(void *libhandle, const char *fname) { + void *lookup_symbol(void *libhandle, const char *fname) JL_NOTSAFEPOINT { void *addr; jl_dlsym(libhandle, fname, &addr, 0); return addr; } - void *lookup(const char *libname, const char *fname) { + void *lookup(const char *libname, const char *fname) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER { StringRef lib(libname); StringRef f(fname); std::lock_guard lock(symbols_mutex); auto uit = user_symbols.find(lib); if (uit == user_symbols.end()) { + jl_task_t *ct = jl_current_task; + int8_t gc_state = jl_gc_unsafe_enter(ct->ptls); void *handle = jl_get_library_(libname, 0); + jl_gc_unsafe_leave(ct->ptls, gc_state); if (!handle) return nullptr; uit = user_symbols.insert(std::make_pair(lib, std::make_pair(handle, StringMap()))).first; @@ -1390,7 +1733,7 @@ struct JuliaOJIT::DLSymOptimizer { return handle; } - void *lookup(uintptr_t libidx, const char *fname) { + void *lookup(uintptr_t libidx, const char *fname) JL_NOTSAFEPOINT { std::lock_guard lock(symbols_mutex); runtime_symbols.resize(std::max(runtime_symbols.size(), libidx + 1)); auto it = runtime_symbols[libidx].second.find(fname); @@ -1402,7 +1745,7 @@ struct JuliaOJIT::DLSymOptimizer { return handle; } - void operator()(Module &M) { + void operator()(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER { for (auto &GV : M.globals()) { auto Name = GV.getName(); if (Name.starts_with("jlplt") && Name.ends_with("got")) { @@ -1518,7 +1861,7 @@ struct JuliaOJIT::DLSymOptimizer { bool named; }; -void optimizeDLSyms(Module &M) { +void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER { JuliaOJIT::DLSymOptimizer(true)(M); } @@ -1552,10 +1895,6 @@ llvm::DataLayout jl_create_datalayout(TargetMachine &TM) { return jl_data_layout; } -#ifdef _COMPILER_ASAN_ENABLED_ -int64_t ___asan_globals_registered; -#endif - JuliaOJIT::JuliaOJIT() : TM(createTargetMachine()), DL(jl_create_datalayout(*TM)), @@ -1564,34 +1903,27 @@ JuliaOJIT::JuliaOJIT() JD(ES.createBareJITDylib("JuliaOJIT")), ExternalJD(ES.createBareJITDylib("JuliaExternal")), DLSymOpt(std::make_unique(false)), - ContextPool([](){ - auto ctx = std::make_unique(); - #if JL_LLVM_VERSION < 170000 - SetOpaquePointer(*ctx); - #endif - return orc::ThreadSafeContext(std::move(ctx)); - }), #ifdef JL_USE_JITLINK MemMgr(createJITLinkMemoryManager()), ObjectLayer(ES, *MemMgr), - CompileLayer(ES, ObjectLayer, std::make_unique>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)), #else MemMgr(createRTDyldMemoryManager()), - ObjectLayer( + UnlockedObjectLayer( ES, [this]() { std::unique_ptr result(new ForwardingMemoryManager(MemMgr)); return result; } ), - LockLayer(ObjectLayer), - CompileLayer(ES, LockLayer, std::make_unique>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)), + ObjectLayer(UnlockedObjectLayer), #endif - JITPointersLayer(ES, CompileLayer, orc::IRTransformLayer::TransformFunction(JITPointersT(SharedBytes, RLST_mutex))), - OptimizeLayer(ES, JITPointersLayer, orc::IRTransformLayer::TransformFunction(OptimizerT(*TM, PrintLLVMTimers, llvm_printing_mutex))), - OptSelLayer(ES, OptimizeLayer, orc::IRTransformLayer::TransformFunction(selectOptLevel)) + CompileLayer(ES, ObjectLayer, std::make_unique>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)), + JITPointers(std::make_unique(SharedBytes, RLST_mutex)), + JITPointersLayer(ES, CompileLayer, IRTransformRef(*JITPointers)), + Optimizers(std::make_unique(*TM, PrintLLVMTimers, llvm_printing_mutex)), + OptimizeLayer(ES, JITPointersLayer, IRTransformRef(*Optimizers)), + OptSelLayer(ES, OptimizeLayer, static_cast(selectOptLevel)) { - JL_MUTEX_INIT(&this->jitlock, "JuliaOJIT"); #ifdef JL_USE_JITLINK # if defined(LLVM_SHLIB) // When dynamically linking against LLVM, use our custom EH frame registration code @@ -1606,12 +1938,7 @@ JuliaOJIT::JuliaOJIT() ObjectLayer.addPlugin(std::make_unique()); ObjectLayer.addPlugin(std::make_unique(jit_bytes_size)); #else - ObjectLayer.setNotifyLoaded( - [this](orc::MaterializationResponsibility &MR, - const object::ObjectFile &Object, - const RuntimeDyld::LoadedObjectInfo &LO) { - registerRTDyldJITObject(Object, LO, MemMgr); - }); + UnlockedObjectLayer.setNotifyLoaded(registerRTDyldJITObject); #endif std::string ErrorStr; @@ -1741,19 +2068,34 @@ JuliaOJIT::JuliaOJIT() #endif cantFail(GlobalJD.define(orc::absoluteSymbols(msan_crt))); #endif +#if JL_LLVM_VERSION < 190000 #ifdef _COMPILER_ASAN_ENABLED_ + // this is a hack to work around a bad assertion: + // /workspace/srcdir/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp:3028: llvm::Error llvm::orc::ExecutionSession::OL_notifyResolved(llvm::orc::MaterializationResponsibility&, const SymbolMap&): Assertion `(KV.second.getFlags() & ~JITSymbolFlags::Common) == (I->second & ~JITSymbolFlags::Common) && "Resolving symbol with incorrect flags"' failed. + // hopefully fixed upstream by e7698a13e319a9919af04d3d693a6f6ea7168a44 + static int64_t jl___asan_globals_registered; orc::SymbolMap asan_crt; #if JL_LLVM_VERSION >= 170000 - asan_crt[mangle("___asan_globals_registered")] = {ExecutorAddr::fromPtr(&___asan_globals_registered), JITSymbolFlags::Exported}; + asan_crt[mangle("___asan_globals_registered")] = {ExecutorAddr::fromPtr(&jl___asan_globals_registered), JITSymbolFlags::Common | JITSymbolFlags::Exported}; #else - asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&___asan_globals_registered, JITSymbolFlags::Exported); + asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&jl___asan_globals_registered, JITSymbolFlags::Common | JITSymbolFlags::Exported); #endif cantFail(JD.define(orc::absoluteSymbols(asan_crt))); #endif +#endif } JuliaOJIT::~JuliaOJIT() = default; +ThreadSafeContext JuliaOJIT::makeContext() +{ + auto ctx = std::make_unique(); + #if JL_LLVM_VERSION < 170000 + SetOpaquePointer(*ctx); + #endif + return orc::ThreadSafeContext(std::move(ctx)); +} + orc::SymbolStringPtr JuliaOJIT::mangle(StringRef Name) { std::string MangleName = getMangledName(Name); @@ -1773,40 +2115,32 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM) { JL_TIMING(LLVM_JIT, JIT_Total); ++ModulesAdded; -#ifndef JL_USE_JITLINK - orc::SymbolLookupSet NewExports; - TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { - for (auto &F : M.global_values()) { - if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) { - auto Name = ES.intern(getMangledName(F.getName())); - NewExports.add(std::move(Name)); - } - } - assert(!verifyLLVMIR(M)); - }); -#endif - - auto Err = OptSelLayer.add(JD, std::move(TSM)); + TSM = selectOptLevel(std::move(TSM)); + TSM = (*Optimizers)(std::move(TSM)); + TSM = (*JITPointers)(std::move(TSM)); + auto Lock = TSM.getContext().getLock(); + Module &M = *TSM.getModuleUnlocked(); + // Treat this as if one of the passes might contain a safepoint + // even though that shouldn't be the case and might be unwise + Expected> Obj = CompileLayer.getCompiler()(M); + if (!Obj) { + ES.reportError(Obj.takeError()); + errs() << "Failed to add module to JIT!\n"; + errs() << "Dumping failing module\n" << M << "\n"; + return; + } + { auto release = std::move(Lock); } + auto Err = JuliaOJIT::addObjectFile(JD, std::move(*Obj)); if (Err) { ES.reportError(std::move(Err)); - errs() << "Failed to add module to JIT!\n"; + errs() << "Failed to add objectfile to JIT!\n"; abort(); } -#ifndef JL_USE_JITLINK - // force eager compilation (for now), due to memory management specifics - // (can't handle compilation recursion) - auto Lookups = ES.lookup({{&JD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly}}, NewExports); - if (!Lookups) { - ES.reportError(Lookups.takeError()); - errs() << "Failed to lookup symbols in module!\n"; - } -#endif } Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, bool ShouldOptimize) { - if (auto Err = TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT -> Error - { + if (auto Err = TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT -> Error { if (M.getDataLayout().isDefault()) M.setDataLayout(DL); if (M.getDataLayout() != DL) @@ -1815,24 +2149,29 @@ Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, M.getDataLayout().getStringRepresentation() + " (module) vs " + DL.getStringRepresentation() + " (jit)", inconvertibleErrorCode()); - + // OrcJIT requires that all modules / files have unique names: + M.setModuleIdentifier((M.getModuleIdentifier() + Twine("-") + Twine(jl_atomic_fetch_add_relaxed(&jitcounter, 1))).str()); return Error::success(); - })) + })) return Err; + //if (ShouldOptimize) + // return OptimizeLayer.add(JD, std::move(TSM)); return CompileLayer.add(JD.getDefaultResourceTracker(), std::move(TSM)); } Error JuliaOJIT::addObjectFile(orc::JITDylib &JD, std::unique_ptr Obj) { assert(Obj && "Can not add null object"); -#ifdef JL_USE_JITLINK + // OrcJIT requires that all modules / files have unique names: + // https://llvm.org/doxygen/namespacellvm_1_1orc.html#a1f5a1bc60c220cdccbab0f26b2a425e1 + // so we have to force a copy here + std::string Name = ("jitted-" + Twine(jl_atomic_fetch_add_relaxed(&jitcounter, 1))).str(); + Obj = Obj->getMemBufferCopy(Obj->getBuffer(), Name); return ObjectLayer.add(JD.getDefaultResourceTracker(), std::move(Obj)); -#else - return LockLayer.add(JD.getDefaultResourceTracker(), std::move(Obj)); -#endif } SmallVector JuliaOJIT::findSymbols(ArrayRef Names) { + // assert(MemMgr.use_count() == 1); (true single-threaded, but slightly race-y to assert it with concurrent threads) DenseMap Unmangled; orc::SymbolLookupSet Exports; for (StringRef Name : Names) { @@ -1978,6 +2317,7 @@ void JuliaOJIT::enableJITDebuggingSupport() addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderGDBAllocAction); auto registerJITLoaderGDBWrapper = addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderGDBWrapper); cantFail(JD.define(orc::absoluteSymbols(GDBFunctions))); + (void)registerJITLoaderGDBWrapper; if (TM->getTargetTriple().isOSBinFormatMachO()) ObjectLayer.addPlugin(cantFail(orc::GDBJITDebugInfoRegistrationPlugin::Create(ES, JD, TM->getTargetTriple()))); #ifndef _COMPILER_ASAN_ENABLED_ // TODO: Fix duplicated sections spam #51794 @@ -2013,12 +2353,12 @@ void JuliaOJIT::enableOProfileJITEventListener() void JuliaOJIT::enablePerfJITEventListener() { #if JL_LLVM_VERSION >= 180000 - orc::SymbolMap PerfFunctions; - auto StartAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfStart); - auto EndAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfEnd); - auto ImplAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfImpl); - cantFail(JD.define(orc::absoluteSymbols(PerfFunctions))); if (TM->getTargetTriple().isOSBinFormatELF()) { + orc::SymbolMap PerfFunctions; + auto StartAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfStart); + auto EndAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfEnd); + auto ImplAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfImpl); + cantFail(JD.define(orc::absoluteSymbols(PerfFunctions))); ObjectLayer.addPlugin(cantFail(DebugInfoPreservationPlugin::Create())); //ObjectLayer.addPlugin(cantFail(PerfSupportPlugin::Create( // ES.getExecutorProcessControl(), *JD, true, true))); @@ -2032,7 +2372,7 @@ void JuliaOJIT::enablePerfJITEventListener() void JuliaOJIT::RegisterJITEventListener(JITEventListener *L) { if (L) - ObjectLayer.registerJITEventListener(*L); + UnlockedObjectLayer.registerJITEventListener(*L); } void JuliaOJIT::enableJITDebuggingSupport() { @@ -2071,7 +2411,7 @@ std::string JuliaOJIT::getMangledName(const GlobalValue *GV) size_t JuliaOJIT::getTotalBytes() const { - auto bytes = jit_bytes_size.load(std::memory_order_relaxed); + auto bytes = jl_atomic_load_relaxed(&jit_bytes_size); #ifndef JL_USE_JITLINK size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT; bytes += getRTDyldMemoryManagerTotalBytes(MemMgr.get()); @@ -2081,7 +2421,7 @@ size_t JuliaOJIT::getTotalBytes() const void JuliaOJIT::addBytes(size_t bytes) { - jit_bytes_size.fetch_add(bytes, std::memory_order_relaxed); + jl_atomic_fetch_add_relaxed(&jit_bytes_size, bytes); } void JuliaOJIT::printTimers() @@ -2326,74 +2666,6 @@ static void jl_decorate_module(Module &M) { #undef ASM_USES_ELF } -#ifndef JL_USE_JITLINK -// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable -static int jl_add_to_ee( - orc::ThreadSafeModule &M, - const StringMap &NewExports, - DenseMap &Queued, - SmallVectorImpl &Stack) -{ - // First check if the TSM is empty (already compiled) - if (!M) - return 0; - // Next check and record if it is on the stack somewhere - { - auto &Id = Queued[&M]; - if (Id) - return Id; - Stack.push_back(&M); - Id = Stack.size(); - } - // Finally work out the SCC - int depth = Stack.size(); - int MergeUp = depth; - SmallVector Children; - M.withModuleDo([&](Module &m) JL_NOTSAFEPOINT { - for (auto &F : m.global_objects()) { - if (F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) { - auto Callee = NewExports.find(F.getName()); - if (Callee != NewExports.end()) { - auto *CM = Callee->second; - if (*CM && CM != &M) { - auto Down = Queued.find(CM); - if (Down != Queued.end()) - MergeUp = std::min(MergeUp, Down->second); - else - Children.push_back(CM); - } - } - } - } - }); - assert(MergeUp > 0); - for (auto *CM : Children) { - int Down = jl_add_to_ee(*CM, NewExports, Queued, Stack); - assert(Down <= (int)Stack.size()); - if (Down) - MergeUp = std::min(MergeUp, Down); - } - if (MergeUp < depth) - return MergeUp; - while (1) { - // Not in a cycle (or at the top of it) - // remove SCC state and merge every CM from the cycle into M - orc::ThreadSafeModule *CM = Stack.back(); - auto it = Queued.find(CM); - assert(it->second == (int)Stack.size()); - Queued.erase(it); - Stack.pop_back(); - if ((int)Stack.size() < depth) { - assert(&M == CM); - break; - } - jl_merge_module(M, std::move(*CM)); - } - jl_ExecutionEngine->addModule(std::move(M)); - return 0; -} -#endif - // helper function for adding a DLLImport (dlsym) address to the execution engine void add_named_global(StringRef name, void *addr) { diff --git a/src/jitlayers.h b/src/jitlayers.h index f4b9a6ea5395a..ba4ac3081795e 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -69,7 +69,6 @@ using namespace llvm; extern "C" jl_cgparams_t jl_default_cgparams; -extern arraylist_t new_invokes; DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeContext, LLVMOrcThreadSafeContextRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeModule, LLVMOrcThreadSafeModuleRef) @@ -154,11 +153,11 @@ struct jl_locked_stream { std::unique_lock lck; ios_t *&stream; - lock(std::mutex &mutex, ios_t *&stream) JL_NOTSAFEPOINT + lock(std::mutex &mutex, ios_t *&stream) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER : lck(mutex), stream(stream) {} lock(lock&) = delete; lock(lock&&) JL_NOTSAFEPOINT = default; - ~lock() JL_NOTSAFEPOINT = default; + ~lock() JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT = default; ios_t *&operator*() JL_NOTSAFEPOINT { return stream; @@ -177,8 +176,8 @@ struct jl_locked_stream { } }; - jl_locked_stream() JL_NOTSAFEPOINT = default; - ~jl_locked_stream() JL_NOTSAFEPOINT = default; + jl_locked_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER = default; + ~jl_locked_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default; lock operator*() JL_NOTSAFEPOINT { return lock(mutex, stream); @@ -210,12 +209,12 @@ struct jl_codegen_call_target_t { jl_returninfo_t::CallingConv cc; unsigned return_roots; llvm::Function *decl; + llvm::Function *oc; bool specsig; }; typedef SmallVector, 0> jl_workqueue_t; -// TODO DenseMap? -typedef std::map> jl_compiled_functions_t; + typedef std::list> CallFrames; struct jl_codegen_params_t { orc::ThreadSafeContext tsctx; @@ -229,7 +228,6 @@ struct jl_codegen_params_t { typedef StringMap SymMapGV; // outputs jl_workqueue_t workqueue; - jl_compiled_functions_t compiled_functions; std::map global_targets; std::map, GlobalVariable*> external_fns; std::map ditypes; @@ -292,13 +290,20 @@ enum CompilationPolicy { Extern = 1, }; -void jl_compile_workqueue( - jl_codegen_params_t ¶ms, - CompilationPolicy policy); - Function *jl_cfunction_object(jl_function_t *f, jl_value_t *rt, jl_tupletype_t *argt, jl_codegen_params_t ¶ms); +Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t ¶ms) JL_NOTSAFEPOINT; +void emit_specsig_to_fptr1( + Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots, + jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure, + size_t nargs, + jl_codegen_params_t ¶ms, + Function *target, + size_t min_world, size_t max_world) JL_NOTSAFEPOINT; +Function *get_or_emit_fptr1(StringRef Name, Module *M) JL_NOTSAFEPOINT; +void jl_init_function(Function *F, const Triple &TT) JL_NOTSAFEPOINT; + void add_named_global(StringRef name, void *addr) JL_NOTSAFEPOINT; static inline Constant *literal_static_pointer_val(const void *p, Type *T) JL_NOTSAFEPOINT @@ -371,6 +376,11 @@ using OptimizerResultT = Expected; using SharedBytesT = StringSet::MapEntryTy)>>; class JuliaOJIT { +private: + // any verification the user wants to do when adding an OwningResource to the pool + template + static void verifyResource(AnyT &resource) JL_NOTSAFEPOINT { } + static void verifyResource(orc::ThreadSafeContext &context) JL_NOTSAFEPOINT { assert(context.getContext()); } public: #ifdef JL_USE_JITLINK typedef orc::ObjectLinkingLayer ObjLayerT; @@ -385,13 +395,13 @@ class JuliaOJIT { std::unique_ptr O) override { JL_TIMING(LLVM_JIT, JIT_Link); #ifndef JL_USE_JITLINK - std::lock_guard lock(EmissionMutex); + std::lock_guard lock(EmissionMutex); #endif BaseLayer.emit(std::move(R), std::move(O)); } private: orc::ObjectLayer &BaseLayer; - std::mutex EmissionMutex; + std::recursive_mutex EmissionMutex; }; #endif typedef orc::IRCompileLayer CompileLayerT; @@ -420,11 +430,16 @@ class JuliaOJIT { : pool(pool), resource(std::move(resource)) {} OwningResource(const OwningResource &) = delete; OwningResource &operator=(const OwningResource &) = delete; - OwningResource(OwningResource &&) JL_NOTSAFEPOINT = default; + OwningResource(OwningResource &&other) JL_NOTSAFEPOINT + : pool(other.pool), resource(std::move(other.resource)) { + other.resource.reset(); + } OwningResource &operator=(OwningResource &&) JL_NOTSAFEPOINT = default; ~OwningResource() JL_NOTSAFEPOINT { // _LEAVE - if (resource) + if (resource) { + verifyResource(*resource); pool.release(std::move(*resource)); + } } ResourceT release() JL_NOTSAFEPOINT { ResourceT res(std::move(*resource)); @@ -510,7 +525,11 @@ class JuliaOJIT { std::unique_ptr mutex; }; + typedef ResourcePool> ContextPoolT; + struct DLSymOptimizer; + struct OptimizerT; + struct JITPointersT; #ifndef JL_USE_JITLINK void RegisterJITEventListener(JITEventListener *L) JL_NOTSAFEPOINT; @@ -528,7 +547,7 @@ class JuliaOJIT { orc::SymbolStringPtr mangle(StringRef Name) JL_NOTSAFEPOINT; void addGlobalMapping(StringRef Name, uint64_t Addr) JL_NOTSAFEPOINT; - void addModule(orc::ThreadSafeModule M) JL_NOTSAFEPOINT; + void addModule(orc::ThreadSafeModule M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER; //Methods for the C API Error addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, @@ -552,15 +571,7 @@ class JuliaOJIT { uint64_t getGlobalValueAddress(StringRef Name) JL_NOTSAFEPOINT; uint64_t getFunctionAddress(StringRef Name) JL_NOTSAFEPOINT; StringRef getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl_code_instance_t *codeinst) JL_NOTSAFEPOINT; - auto getContext() JL_NOTSAFEPOINT { - return *ContextPool; - } - orc::ThreadSafeContext acquireContext() { // JL_NOTSAFEPOINT_ENTER? - return ContextPool.acquire(); - } - void releaseContext(orc::ThreadSafeContext &&ctx) { // JL_NOTSAFEPOINT_LEAVE? - ContextPool.release(std::move(ctx)); - } + orc::ThreadSafeContext makeContext() JL_NOTSAFEPOINT; const DataLayout& getDataLayout() const JL_NOTSAFEPOINT; // TargetMachine pass-through methods @@ -576,22 +587,21 @@ class JuliaOJIT { void addBytes(size_t bytes) JL_NOTSAFEPOINT; void printTimers() JL_NOTSAFEPOINT; - jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT { + jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return dump_emitted_mi_name_stream; } - jl_locked_stream &get_dump_compiles_stream() JL_NOTSAFEPOINT { + jl_locked_stream &get_dump_compiles_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return dump_compiles_stream; } - jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT { + jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return dump_llvm_opt_stream; } std::string getMangledName(StringRef Name) JL_NOTSAFEPOINT; std::string getMangledName(const GlobalValue *GV) JL_NOTSAFEPOINT; - // Note that this is a safepoint due to jl_get_library_ and jl_dlsym calls - void optimizeDLSyms(Module &M); - - jl_mutex_t jitlock; + // Note that this is a potential safepoint due to jl_get_library_ and jl_dlsym calls + // but may be called from inside safe-regions due to jit compilation locks + void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER; private: @@ -618,20 +628,20 @@ class JuliaOJIT { std::mutex llvm_printing_mutex{}; SmallVector, 0> PrintLLVMTimers; - ResourcePool> ContextPool; - - std::atomic jit_bytes_size{0}; -#ifndef JL_USE_JITLINK - const std::shared_ptr MemMgr; -#else + _Atomic(size_t) jit_bytes_size{0}; + _Atomic(size_t) jitcounter{0}; +#ifdef JL_USE_JITLINK const std::unique_ptr MemMgr; -#endif ObjLayerT ObjectLayer; -#ifndef JL_USE_JITLINK - LockLayerT LockLayer; +#else + const std::shared_ptr MemMgr; // shared_ptr protected by LockLayerT.EmissionMutex + ObjLayerT UnlockedObjectLayer; + LockLayerT ObjectLayer; #endif CompileLayerT CompileLayer; + std::unique_ptr JITPointers; JITPointersLayerT JITPointersLayer; + std::unique_ptr Optimizers; OptimizeLayerT OptimizeLayer; OptSelLayerT OptSelLayer; }; diff --git a/src/julia.h b/src/julia.h index 7bb5f31eda708..168ba0deff1ec 100644 --- a/src/julia.h +++ b/src/julia.h @@ -426,8 +426,8 @@ typedef struct _jl_opaque_closure_t { jl_value_t *captures; size_t world; jl_method_t *source; - jl_fptr_args_t invoke; - void *specptr; + jl_fptr_args_t invoke; // n.b. despite the similar name, this is not an invoke ABI (jl_call_t / julia.call2), but rather the fptr1 (jl_fptr_args_t / julia.call) ABI + void *specptr; // n.b. despite the similarity in field name, this is not arbitrary private data for jlcall, but rather the codegen ABI for specsig, and is mandatory if specsig is valid } jl_opaque_closure_t; // This type represents an executable operation @@ -475,7 +475,7 @@ typedef struct _jl_code_instance_t { // & 0b100 == From image _Atomic(uint8_t) precompile; // if set, this will be added to the output system image uint8_t relocatability; // nonzero if all roots are built into sysimg or tagged by module key - _Atomic(jl_callptr_t) invoke; // jlcall entry point + _Atomic(jl_callptr_t) invoke; // jlcall entry point usually, but if this codeinst belongs to an OC Method, then this is an jl_fptr_args_t fptr1 instead, unless it is not, because it is a special token object instead union _jl_generic_specptr_t { _Atomic(void*) fptr; _Atomic(jl_fptr_args_t) fptr1; @@ -2339,7 +2339,13 @@ JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_RO extern JL_DLLIMPORT int jl_task_gcstack_offset; extern JL_DLLIMPORT int jl_task_ptls_offset; +#ifdef __cplusplus +} +#endif #include "julia_locks.h" // requires jl_task_t definition +#ifdef __cplusplus +extern "C" { +#endif // Return the exception currently being handled, or `jl_nothing`. // diff --git a/src/julia_atomics.h b/src/julia_atomics.h index c094afcc54cd5..d05f0fafab28f 100644 --- a/src/julia_atomics.h +++ b/src/julia_atomics.h @@ -103,12 +103,12 @@ enum jl_memory_order { // this wrong thus we include the correct definitions here (with implicit // conversion), instead of using the macro version template -T jl_atomic_load(std::atomic *ptr) +T jl_atomic_load(const std::atomic *ptr) { return std::atomic_load(ptr); } template -T jl_atomic_load_explicit(std::atomic *ptr, std::memory_order order) +T jl_atomic_load_explicit(const std::atomic *ptr, std::memory_order order) { return std::atomic_load_explicit(ptr, order); } diff --git a/src/julia_internal.h b/src/julia_internal.h index 82c91c6d073af..bb8169c6e5f9e 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1715,13 +1715,14 @@ JL_DLLEXPORT int jl_tupletype_length_compat(jl_value_t *v, size_t nargs) JL_NOTS JL_DLLEXPORT jl_value_t *jl_argtype_with_function(jl_value_t *f, jl_value_t *types0); JL_DLLEXPORT jl_value_t *jl_argtype_with_function_type(jl_value_t *ft JL_MAYBE_UNROOTED, jl_value_t *types0); +JL_DLLEXPORT jl_value_t *jl_argtype_without_function(jl_value_t *ftypes); JL_DLLEXPORT unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type); -void register_eh_frames(uint8_t *Addr, size_t Size); -void deregister_eh_frames(uint8_t *Addr, size_t Size); +void register_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT; +void deregister_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT; -STATIC_INLINE void *jl_get_frame_addr(void) +STATIC_INLINE void *jl_get_frame_addr(void) JL_NOTSAFEPOINT { #ifdef __GNUC__ return __builtin_frame_address(0); diff --git a/src/julia_locks.h b/src/julia_locks.h index 5774ddada60c6..4d1345177f965 100644 --- a/src/julia_locks.h +++ b/src/julia_locks.h @@ -103,6 +103,33 @@ JL_DLLEXPORT void jl_unlock_field(jl_mutex_t *v) JL_NOTSAFEPOINT; #ifdef __cplusplus } + +#include +#include +// simple C++ shim around a std::unique_lock + gc-safe + disabled finalizers region +// since we nearly always want that combination together +class jl_unique_gcsafe_lock { +public: + int8_t gc_state; + std::unique_lock native; + explicit jl_unique_gcsafe_lock(std::mutex &native) JL_NOTSAFEPOINT_ENTER + { + jl_task_t *ct = jl_current_task; + gc_state = jl_gc_safe_enter(ct->ptls); + this->native = std::unique_lock(native); + ct->ptls->engine_nqueued++; // disables finalizers until inference is finished on this method graph + } + jl_unique_gcsafe_lock(jl_unique_gcsafe_lock &&native) = delete; + jl_unique_gcsafe_lock(jl_unique_gcsafe_lock &native) = delete; + ~jl_unique_gcsafe_lock() JL_NOTSAFEPOINT_LEAVE { + jl_task_t *ct = jl_current_task; + jl_gc_safe_leave(ct->ptls, gc_state); + ct->ptls->engine_nqueued--; // enable finalizers (but don't run them until the next gc) + } + void wait(std::condition_variable& cond) JL_NOTSAFEPOINT { + cond.wait(native); + } +}; #endif #endif diff --git a/src/opaque_closure.c b/src/opaque_closure.c index 0bf3a729cbcb1..9fe36f32d2030 100644 --- a/src/opaque_closure.c +++ b/src/opaque_closure.c @@ -80,14 +80,16 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t if (!jl_subtype(rt_lb, selected_rt)) { // TODO: It would be better to try to get a specialization with the // correct rt check here (or we could codegen a wrapper). - specptr = NULL; invoke = (jl_fptr_args_t)jl_interpret_opaque_closure; + specptr = NULL; // this will force codegen of the unspecialized version + invoke = (jl_fptr_args_t)jl_interpret_opaque_closure; jl_value_t *ts[2] = {rt_lb, (jl_value_t*)ci->rettype}; selected_rt = jl_type_union(ts, 2); } if (!jl_subtype(ci->rettype, rt_ub)) { // TODO: It would be better to try to get a specialization with the // correct rt check here (or we could codegen a wrapper). - specptr = NULL; invoke = (jl_fptr_args_t)jl_interpret_opaque_closure; + specptr = NULL; // this will force codegen of the unspecialized version + invoke = (jl_fptr_args_t)jl_interpret_opaque_closure; selected_rt = jl_type_intersection(rt_ub, selected_rt); } @@ -108,8 +110,7 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t jl_value_t *oc_type JL_ALWAYS_LEAFTYPE = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, (jl_value_t*)argt, selected_rt); JL_GC_PROMISE_ROOTED(oc_type); - if (!specptr) { - sigtype = jl_argtype_with_function_type((jl_value_t*)oc_type, (jl_value_t*)argt); + if (specptr == NULL) { jl_method_instance_t *mi_generic = jl_specializations_get_linfo(jl_opaque_closure_method, sigtype, jl_emptysvec); // OC wrapper methods are not world dependent @@ -197,7 +198,7 @@ int jl_tupletype_length_compat(jl_value_t *v, size_t nargs) JL_CALLABLE(jl_f_opaque_closure_call) { - jl_opaque_closure_t* oc = (jl_opaque_closure_t*)F; + jl_opaque_closure_t *oc = (jl_opaque_closure_t*)F; jl_value_t *argt = jl_tparam0(jl_typeof(oc)); if (!jl_tupletype_length_compat(argt, nargs)) jl_method_error(F, args, nargs + 1, oc->world); diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 09d51598ea8b7..f8935070bb001 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -980,3 +980,9 @@ extern "C" JL_DLLEXPORT_CODEGEN ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() JL_NOTSAFEPOINT { return {LLVM_PLUGIN_API_VERSION, "Julia", "1", registerCallbacks}; } + +void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis) +{ + PM->add(new TargetLibraryInfoWrapperPass(triple)); + PM->add(createTargetTransformInfoWrapperPass(std::move(analysis))); +} diff --git a/src/stackwalk.c b/src/stackwalk.c index 6784e601bcfba..7c6f946fe73c5 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -642,13 +642,13 @@ void jl_print_native_codeloc(uintptr_t ip) JL_NOTSAFEPOINT for (i = 0; i < n; i++) { jl_frame_t frame = frames[i]; if (!frame.func_name) { - jl_safe_printf("unknown function (ip: %p)\n", (void*)ip); + jl_safe_printf("unknown function (ip: %p) at %s\n", (void*)ip, frame.file_name ? frame.file_name : "(unknown file)"); } else { jl_safe_print_codeloc(frame.func_name, frame.file_name, frame.line, frame.inlined); free(frame.func_name); - free(frame.file_name); } + free(frame.file_name); } free(frames); } From aa51abe6400bc29c6093dca5a395fc03806ab511 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Sat, 19 Oct 2024 01:20:59 -0400 Subject: [PATCH 14/47] rename: invalid -> incompatible cache header (#56240) Falling back to the older serial precompilation process is basically a bug (except for if a manifest hasn't been resolved) so https://github.com/JuliaLang/julia/pull/52619 added more info on why it's been hit so we have a chance of fixing issues that are otherwise very difficult to recreate. However "invalid header" which usually just means it was made by a different julia version appears to sound too alarming to users. https://discourse.julialang.org/t/cache-misses-when-using-packages-since-upgrading-to-1-11/121445 So soften it there and in error messages, given it seems a better description. Suggested by @giordano in https://discourse.julialang.org/t/cache-misses-when-using-packages-since-upgrading-to-1-11/121445/4?u=ianshmean --- base/loading.jl | 22 +++++++++++----------- stdlib/Logging/docs/src/index.md | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 78c584d00852b..db6a681bb2a5b 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1247,7 +1247,7 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No else io = open(path, "r") try - iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.") + iszero(isvalid_cache_header(io)) && return ArgumentError("Incompatible header in cache file $path.") _, (includes, _, _), _, _, _, _, _, _ = parse_cache_header(io, path) ignore_native = pkg_tracked(includes) finally @@ -1887,7 +1887,7 @@ function isrelocatable(pkg::PkgId) isnothing(path) && return false io = open(path, "r") try - iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) + iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile.")) _, (includes, includes_srcfiles, _), _... = _parse_cache_header(io, path) for inc in includes !startswith(inc.filename, "@depot") && return false @@ -1962,7 +1962,7 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union io = open(path, "r") ignore_native = false try - iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.") + iszero(isvalid_cache_header(io)) && return ArgumentError("Incompatible header in cache file $path.") _, (includes, _, _), depmodnames, _, _, _, clone_targets, _ = parse_cache_header(io, path) ignore_native = pkg_tracked(includes) @@ -3179,7 +3179,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in # append extra crc to the end of the .ji file: open(tmppath, "r+") do f if iszero(isvalid_cache_header(f)) - error("Invalid header for $(repr("text/plain", pkg)) in new cache file $(repr(tmppath)).") + error("Incompatible header for $(repr("text/plain", pkg)) in new cache file $(repr(tmppath)).") end seekend(f) write(f, crc_so) @@ -3503,7 +3503,7 @@ end function parse_cache_header(cachefile::String) io = open(cachefile, "r") try - iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) + iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile.")) ret = parse_cache_header(io, cachefile) return ret finally @@ -3516,7 +3516,7 @@ function preferences_hash(cachefile::String) io = open(cachefile, "r") try if iszero(isvalid_cache_header(io)) - throw(ArgumentError("Invalid header in cache file $cachefile.")) + throw(ArgumentError("Incompatible header in cache file $cachefile.")) end return preferences_hash(io, cachefile) finally @@ -3532,7 +3532,7 @@ end function cache_dependencies(cachefile::String) io = open(cachefile, "r") try - iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) + iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile.")) return cache_dependencies(io, cachefile) finally close(io) @@ -3572,7 +3572,7 @@ end function read_dependency_src(cachefile::String, filename::AbstractString) io = open(cachefile, "r") try - iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) + iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile.")) return read_dependency_src(io, cachefile, filename) finally close(io) @@ -3856,9 +3856,9 @@ end try checksum = isvalid_cache_header(io) if iszero(checksum) - @debug "Rejecting cache file $cachefile due to it containing an invalid cache header" - record_reason(reasons, "invalid header") - return true # invalid cache file + @debug "Rejecting cache file $cachefile due to it containing an incompatible cache header" + record_reason(reasons, "incompatible header") + return true # incompatible cache file end modules, (includes, _, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, actual_flags = parse_cache_header(io, cachefile) if isempty(modules) diff --git a/stdlib/Logging/docs/src/index.md b/stdlib/Logging/docs/src/index.md index 17d4e71328ac4..a2bfd499e4586 100644 --- a/stdlib/Logging/docs/src/index.md +++ b/stdlib/Logging/docs/src/index.md @@ -191,10 +191,10 @@ module. Loading julia with `JULIA_DEBUG=loading` will activate ``` $ JULIA_DEBUG=loading julia -e 'using OhMyREPL' -┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji due to it containing an invalid cache header +┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji due to it containing an incompatible cache header └ @ Base loading.jl:1328 [ Info: Recompiling stale cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji for module OhMyREPL -┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/Tokenize.ji due to it containing an invalid cache header +┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/Tokenize.ji due to it containing an incompatible cache header └ @ Base loading.jl:1328 ... ``` From b0c1525f1731186767ae42e7d625bf0909e49af8 Mon Sep 17 00:00:00 2001 From: Nick Robinson Date: Sat, 19 Oct 2024 09:27:56 +0100 Subject: [PATCH 15/47] Restore support for checking for UndefVarError variable name in at-test_throws (#56231) Fix https://github.com/JuliaLang/julia/issues/54082 Arguably this was a breaking change (as a consequence of https://github.com/JuliaLang/julia/pull/51979). But regardless, it seems like useful functionality to have a public API for testing that an `UndefVarError` was thrown for the expected variable name (regardless of scope). This is particularly useful if you don't know what the scope is (for example, in my use-case i want to test that a specific `UndefVarError` is thrown from a module with a `gensym`'d name). Pre-v1.11 the syntax for this was ```julia @test_throws UndefVarError(:x) foo() ``` but that stopped working in v1.11 when `UndefVarError` got a second field (in fact in v1.11.1 this is an error when before it would pass) This PR restores that functionality. We might want to backport it to v1.11.x so that v1.11 isn't the only version that doesn't support this. --- stdlib/Test/src/Test.jl | 6 +++++- stdlib/Test/test/runtests.jl | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl index 46bc2d8790cec..cf906591b9962 100644 --- a/stdlib/Test/src/Test.jl +++ b/stdlib/Test/src/Test.jl @@ -812,7 +812,11 @@ function do_test_throws(result::ExecutionResult, orig_expr, extype) if extype isa LoadError && !(exc isa LoadError) && typeof(extype.error) == typeof(exc) extype = extype.error # deprecated end - if isa(exc, typeof(extype)) + # Support `UndefVarError(:x)` meaning `UndefVarError(:x, scope)` for any `scope`. + # Retains the behaviour from pre-v1.11 when `UndefVarError` didn't have `scope`. + if isa(extype, UndefVarError) && !isdefined(extype, :scope) + success = exc isa UndefVarError && exc.var == extype.var + else isa(exc, typeof(extype)) success = true for fld in 1:nfields(extype) if !isequal(getfield(extype, fld), getfield(exc, fld)) diff --git a/stdlib/Test/test/runtests.jl b/stdlib/Test/test/runtests.jl index 3ddcd7d5de0fd..0c08f78ef356f 100644 --- a/stdlib/Test/test/runtests.jl +++ b/stdlib/Test/test/runtests.jl @@ -1736,3 +1736,20 @@ end This is deprecated and may error in the future.""" @test_deprecated msg2 @macroexpand @testset DefaultTestSet DefaultTestSet begin end end + +# Issue #54082 +module M54082 end +@testset "@test_throws UndefVarError(:var)" begin + # Single-arg `UndefVarError` should match all `UndefVarError` for the + # same variable name, regardless of scope, to keep pre-v1.11 behaviour. + f54082() = var + @test_throws UndefVarError(:var) f54082() + # But if scope is set, then it has to match. + @test_throws UndefVarError(:var, M54082) M54082.var + let result = @testset NoThrowTestSet begin + # Wrong module scope + @test_throws UndefVarError(:var, Main) M54082.var + end + @test only(result) isa Test.Fail + end +end From 877de9839809e769d4f9707a61df3400d087d8d3 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Sun, 20 Oct 2024 00:46:03 +1100 Subject: [PATCH 16/47] Refactoring to be considered before adding MMTk (#55608) This PR contains some refactoring of common functions that were moved to `gc-common.c` and should be shared between MMTk and Julia's stock GC. --- src/Makefile | 2 +- src/gc-common.c | 205 +++++++++++++++++++++++++++ src/gc-common.h | 12 ++ src/gc-debug.c | 42 ------ src/gc-interface.h | 37 ++--- src/gc-stacks.c | 22 +-- src/gc-stock.c | 325 ++++++++++++++----------------------------- src/gc-stock.h | 6 - src/gc-tls-common.h | 52 +++++++ src/gc-tls.h | 25 ---- src/julia_internal.h | 26 +--- src/julia_threads.h | 2 + src/scheduler.c | 4 - src/stackwalk.c | 12 +- 14 files changed, 402 insertions(+), 370 deletions(-) create mode 100644 src/gc-tls-common.h diff --git a/src/Makefile b/src/Makefile index 75635c2e6c062..3458f51fa5548 100644 --- a/src/Makefile +++ b/src/Makefile @@ -103,7 +103,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0) UV_HEADERS += uv.h UV_HEADERS += uv/*.h endif -PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) +PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h gc-tls-common.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) ifeq ($(OS),WINNT) PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h) endif diff --git a/src/gc-common.c b/src/gc-common.c index ee461b576ea9e..b552afb8228f0 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -20,6 +20,11 @@ extern "C" { jl_gc_num_t gc_num = {0}; +JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) +{ + return gc_num.total_time; +} + // =========================================================================== // // GC Callbacks // =========================================================================== // @@ -485,10 +490,210 @@ JL_DLLEXPORT void jl_finalize(jl_value_t *o) int gc_n_threads; jl_ptls_t* gc_all_tls_states; +// =========================================================================== // +// Allocation +// =========================================================================== // + +JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc(ptls, sz, ty); +} + +JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sz, NULL); +} + +// allocation wrappers that save the size of allocations, to allow using +// jl_gc_counted_* functions with a libc-compatible API. + +JL_DLLEXPORT void *jl_malloc(size_t sz) +{ + int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); + if (p == NULL) + return NULL; + p[0] = sz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +//_unchecked_calloc does not check for potential overflow of nm*sz +STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { + size_t nmsz = nm*sz; + int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); + if (p == NULL) + return NULL; + p[0] = nmsz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) +{ + if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) + return NULL; + return _unchecked_calloc(nm, sz); +} + +JL_DLLEXPORT void jl_free(void *p) +{ + if (p != NULL) { + int64_t *pp = (int64_t *)p - 2; + size_t sz = pp[0]; + jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); + } +} + +JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) +{ + int64_t *pp; + size_t szold; + if (p == NULL) { + pp = NULL; + szold = 0; + } + else { + pp = (int64_t *)p - 2; + szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; + } + int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); + if (pnew == NULL) + return NULL; + pnew[0] = sz; + return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +// allocator entry points + +JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc_(ptls, sz, ty); +} + +// =========================================================================== // +// Generic Memory +// =========================================================================== // + +size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT +{ + const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; + size_t sz = layout->size * m->length; + if (layout->flags.arrayelem_isunion) + // account for isbits Union array selector bytes + sz += m->length; + return sz; +} + +// tracking Memorys with malloc'd storage +void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ + // This is **NOT** a GC safe point. + mallocmemory_t *ma; + if (ptls->gc_tls_common.heap.mafreelist == NULL) { + ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); + } + else { + ma = ptls->gc_tls_common.heap.mafreelist; + ptls->gc_tls_common.heap.mafreelist = ma->next; + } + ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); + ma->next = ptls->gc_tls_common.heap.mallocarrays; + ptls->gc_tls_common.heap.mallocarrays = ma; +} + +// =========================================================================== // +// GC Debug +// =========================================================================== // + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = module_usings_length(m); + elsize = sizeof(struct _jl_module_using); + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + +// =========================================================================== // +// GC Control +// =========================================================================== // + +JL_DLLEXPORT uint32_t jl_get_gc_disable_counter(void) { + return jl_atomic_load_acquire(&jl_gc_disable_counter); +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + + +// collector entry point and control +_Atomic(uint32_t) jl_gc_disable_counter = 1; + +JL_DLLEXPORT int jl_gc_enable(int on) +{ + jl_ptls_t ptls = jl_current_task->ptls; + int prev = !ptls->disable_gc; + ptls->disable_gc = (on == 0); + if (on && !prev) { + // disable -> enable + if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { + gc_num.allocd += gc_num.deferred_alloc; + gc_num.deferred_alloc = 0; + } + } + else if (prev && !on) { + // enable -> disable + jl_atomic_fetch_add(&jl_gc_disable_counter, 1); + // check if the GC is running and wait for it to finish + jl_gc_safepoint_(ptls); + } + return prev; +} + // =========================================================================== // // MISC // =========================================================================== // +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_new_weakref_th(ptls, value); +} + const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { diff --git a/src/gc-common.h b/src/gc-common.h index 4d53830442a7d..32b7470b13a58 100644 --- a/src/gc-common.h +++ b/src/gc-common.h @@ -53,6 +53,12 @@ extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure; // malloc wrappers, aligned allocation // =========================================================================== // +// data structure for tracking malloc'd genericmemory. +typedef struct _mallocmemory_t { + jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory + struct _mallocmemory_t *next; +} mallocmemory_t; + #if defined(_OS_WINDOWS_) STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) { @@ -173,4 +179,10 @@ JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o); extern int gc_n_threads; extern jl_ptls_t* gc_all_tls_states; +// =========================================================================== // +// Logging +// =========================================================================== // + +extern int gc_logging_enabled; + #endif // JL_GC_COMMON_H diff --git a/src/gc-debug.c b/src/gc-debug.c index 5c150aba68e10..7c479484cde45 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1105,48 +1105,6 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT -{ - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; -} - -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT -{ - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = module_usings_length(m); - elsize = sizeof(struct _jl_module_using); - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; -} - -static int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; -} - -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; -} - void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { return; diff --git a/src/gc-interface.h b/src/gc-interface.h index bb2abbe2d36ac..0b5df17a3b8c5 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -96,6 +96,8 @@ JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem); // should run a collection cycle again (e.g. a full mark right after a full sweep to ensure // we do a full heap traversal). JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection); +// Returns whether the thread with `tid` is a collector thread +JL_DLLEXPORT int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT; // ========================================================================= // // Metrics @@ -130,6 +132,13 @@ JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void); // Allocation // ========================================================================= // +// On GCC, this function is inlined when sz is constant (see julia_internal.h) +// In general, this function should implement allocation and should use the specific GC's logic +// to decide whether to allocate a small or a large object. Finally, note that this function +// **must** also set the type of the returning object to be `ty`. The type `ty` may also be used to record +// an allocation of that type in the allocation profiler. +struct _jl_value_t *jl_gc_alloc_(struct _jl_tls_states_t * ptls, size_t sz, void *ty); + // Allocates small objects and increments Julia allocation counterst. Size of the object // header must be included in the object size. The (possibly unused in some implementations) // offset to the arena in which we're allocating is passed in the second parameter, and the @@ -157,26 +166,6 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz); JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz); // Wrapper around Libc realloc that updates Julia allocation counters. JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz); -// Wrapper around Libc malloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_malloc(size_t sz); -// Wrapper around Libc calloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz); -// Wrapper around Libc free that takes a pointer to the payload of a memory region allocated -// with jl_malloc or jl_calloc, and uses the size information stored in the first machine -// words of the memory buffer update Julia allocation counters, and then frees the -// corresponding memory buffer. -JL_DLLEXPORT void jl_free(void *p); -// Wrapper around Libc realloc that takes a memory region allocated with jl_malloc or -// jl_calloc, and uses the size information stored in the first machine words of the memory -// buffer to update Julia allocation counters, reallocating the corresponding memory buffer -// in the end. -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz); // Wrapper around Libc malloc that's used to dynamically allocate memory for Arrays and // Strings. It increments Julia allocation counters and should check whether we're close to // the Julia heap target, and therefore, whether we should run a collection. Note that this @@ -190,14 +179,6 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz); // thread-local allocator of the thread referenced by the first jl_ptls_t argument. JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref_th(struct _jl_tls_states_t *ptls, struct _jl_value_t *value); -// Allocates a new weak-reference, assigns its value and increments Julia allocation -// counters. If thread-local allocators are used, then this function should allocate in the -// thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref(struct _jl_value_t *value); -// Allocates an object whose size is specified by the function argument and increments Julia -// allocation counters. If thread-local allocators are used, then this function should -// allocate in the thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_value_t *jl_gc_allocobj(size_t sz); // Permanently allocates a memory slot of the size specified by the first parameter. This // block of memory is allocated in an immortal region that is never swept. The second // parameter specifies whether the memory should be filled with zeros. The third and fourth diff --git a/src/gc-stacks.c b/src/gc-stacks.c index f6e787a4c1d2d..a2d3862dc9501 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -47,7 +47,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { VirtualFree(stkbuf, 0, MEM_RELEASE); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); @@ -82,7 +82,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT return stk; } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { munmap(stkbuf, bufsz); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); @@ -132,7 +132,7 @@ void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) { unsigned pool_id = select_pool(bufsz); if (pool_sizes[pool_id] == bufsz) { - small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf); + small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf); return; } } @@ -161,7 +161,7 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task) #ifdef _COMPILER_ASAN_ENABLED_ __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz); #endif - small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf); + small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf); } } } @@ -176,7 +176,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) { unsigned pool_id = select_pool(ssize); ssize = pool_sizes[pool_id]; - small_arraylist_t *pool = &ptls->gc_tls.heap.free_stacks[pool_id]; + small_arraylist_t *pool = &ptls->gc_tls_common.heap.free_stacks[pool_id]; if (pool->len > 0) { stk = small_arraylist_pop(pool); } @@ -197,7 +197,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO } *bufsz = ssize; if (owner) { - small_arraylist_t *live_tasks = &ptls->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls->gc_tls_common.heap.live_tasks; mtarraylist_push(live_tasks, owner); } return stk; @@ -228,7 +228,7 @@ void sweep_stack_pool_loop(void) JL_NOTSAFEPOINT // free half of stacks that remain unused since last sweep if (i == jl_atomic_load_relaxed(&gc_stack_free_idx)) { for (int p = 0; p < JL_N_STACK_POOLS; p++) { - small_arraylist_t *al = &ptls2->gc_tls.heap.free_stacks[p]; + small_arraylist_t *al = &ptls2->gc_tls_common.heap.free_stacks[p]; size_t n_to_free; if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { n_to_free = al->len; // not alive yet or dead, so it does not need these anymore @@ -251,10 +251,10 @@ void sweep_stack_pool_loop(void) JL_NOTSAFEPOINT } } if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { - small_arraylist_free(ptls2->gc_tls.heap.free_stacks); + small_arraylist_free(ptls2->gc_tls_common.heap.free_stacks); } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = 0; size_t ndel = 0; size_t l = live_tasks->len; @@ -306,7 +306,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void) jl_ptls_t ptls2 = allstates[i]; if (ptls2 == NULL) continue; - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); l += n + (ptls2->root_task->ctx.stkbuf != NULL); } @@ -325,7 +325,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void) goto restart; jl_array_data(a,void*)[j++] = t; } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); for (size_t i = 0; i < n; i++) { jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i); diff --git a/src/gc-stock.c b/src/gc-stock.c index f60aa89e6b11d..541c5b4ecc5c2 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -363,7 +363,7 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *valu jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); wr->value = value; // NOTE: wb not needed here - small_arraylist_push(&ptls->gc_tls.heap.weak_refs, wr); + small_arraylist_push(&ptls->gc_tls_common.heap.weak_refs, wr); return wr; } @@ -373,8 +373,8 @@ static void clear_weak_refs(void) for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; if (ptls2 != NULL) { - size_t n, l = ptls2->gc_tls.heap.weak_refs.len; - void **lst = ptls2->gc_tls.heap.weak_refs.items; + size_t n, l = ptls2->gc_tls_common.heap.weak_refs.len; + void **lst = ptls2->gc_tls_common.heap.weak_refs.items; for (n = 0; n < l; n++) { jl_weakref_t *wr = (jl_weakref_t*)lst[n]; if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc)) @@ -392,8 +392,8 @@ static void sweep_weak_refs(void) if (ptls2 != NULL) { size_t n = 0; size_t ndel = 0; - size_t l = ptls2->gc_tls.heap.weak_refs.len; - void **lst = ptls2->gc_tls.heap.weak_refs.items; + size_t l = ptls2->gc_tls_common.heap.weak_refs.len; + void **lst = ptls2->gc_tls_common.heap.weak_refs.items; if (l == 0) continue; while (1) { @@ -408,7 +408,7 @@ static void sweep_weak_refs(void) lst[n] = lst[n + ndel]; lst[n + ndel] = tmp; } - ptls2->gc_tls.heap.weak_refs.len -= ndel; + ptls2->gc_tls_common.heap.weak_refs.len -= ndel; } } } @@ -416,18 +416,18 @@ static void sweep_weak_refs(void) STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc) + sz; + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc) + sz; if (alloc_acc < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, alloc_acc); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, alloc_acc); else { jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); } } STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc) + sz); } // big value list @@ -448,10 +448,10 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) jl_throw(jl_memory_exception); gc_invoke_callbacks(jl_gc_cb_notify_external_alloc_t, gc_cblist_notify_external_alloc, (v, allocsz)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc) + 1); jl_batch_accum_heap_size(ptls, allocsz); #ifdef MEMDEBUG memset(v, 0xee, allocsz); @@ -561,29 +561,11 @@ static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT gc_time_big_end(); } -// tracking Memorys with malloc'd storage - -void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ - // This is **NOT** a GC safe point. - mallocmemory_t *ma; - if (ptls->gc_tls.heap.mafreelist == NULL) { - ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); - } - else { - ma = ptls->gc_tls.heap.mafreelist; - ptls->gc_tls.heap.mafreelist = ma->next; - } - ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); - ma->next = ptls->gc_tls.heap.mallocarrays; - ptls->gc_tls.heap.mallocarrays = ma; -} - - void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz); jl_batch_accum_heap_size(ptls, sz); } @@ -602,18 +584,18 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTS for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls) { - dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval); - dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc); - dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc); - dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc); - dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc); - dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval); + dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc); + dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc); + dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc); + dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc); + dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc); if (update_heap) { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc); - freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc); + freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc); jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -629,13 +611,13 @@ static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls != NULL) { // don't reset `pool_live_bytes` here - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -655,17 +637,6 @@ void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT reset_thread_gc_counts(); } -size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT -{ - const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; - size_t sz = layout->size * m->length; - if (layout->flags.arrayelem_isunion) - // account for isbits Union array selector bytes - sz += m->length; - return sz; -} - - static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT { assert(jl_is_genericmemory(v)); @@ -689,8 +660,8 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT for (int t_i = 0; t_i < gc_n_threads; t_i++) { jl_ptls_t ptls2 = gc_all_tls_states[t_i]; if (ptls2 != NULL) { - mallocmemory_t *ma = ptls2->gc_tls.heap.mallocarrays; - mallocmemory_t **pma = &ptls2->gc_tls.heap.mallocarrays; + mallocmemory_t *ma = ptls2->gc_tls_common.heap.mallocarrays; + mallocmemory_t **pma = &ptls2->gc_tls_common.heap.mallocarrays; while (ma != NULL) { mallocmemory_t *nxt = ma->next; jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1); @@ -702,8 +673,8 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT *pma = nxt; int isaligned = (uintptr_t)ma->a & 1; jl_gc_free_memory(a, isaligned); - ma->next = ptls2->gc_tls.heap.mafreelist; - ptls2->gc_tls.heap.mafreelist = ma; + ma->next = ptls2->gc_tls_common.heap.mafreelist; + ptls2->gc_tls_common.heap.mafreelist = ma; } gc_time_count_mallocd_memory(bits); ma = nxt; @@ -764,12 +735,12 @@ STATIC_INLINE jl_value_t *jl_gc_small_alloc_inner(jl_ptls_t ptls, int offset, return jl_gc_big_alloc(ptls, osize, NULL); #endif maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + osize); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + osize); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc) + 1); // first try to use the freelist jl_taggedvalue_t *v = p->freelist; if (v != NULL) { @@ -824,6 +795,29 @@ jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset, int osize) { return jl_gc_small_alloc_inner(ptls, offset, osize); } +// Size does NOT include the type tag!! +inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) +{ + jl_value_t *v; + const size_t allocsz = sz + sizeof(jl_taggedvalue_t); + if (sz <= GC_MAX_SZCLASS) { + int pool_id = jl_gc_szclass(allocsz); + jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; + int osize = jl_gc_sizeclasses[pool_id]; + // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in + // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) + v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); + } + else { + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + v = jl_gc_big_alloc_noinline(ptls, allocsz); + } + jl_set_typeof(v, ty); + maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); + return v; +} + int jl_gc_classify_pools(size_t sz, int *osize) { if (sz > GC_MAX_SZCLASS) @@ -983,8 +977,8 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_ // instead of adding it to the thread that originally allocated the page, so we can avoid // an atomic-fetch-add here. size_t delta = (GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + delta); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + delta); jl_atomic_fetch_add_relaxed((_Atomic(int64_t) *)&gc_num.freed, (nfree - old_nfree) * osize); } @@ -1277,7 +1271,7 @@ static void gc_sweep_pool(void) } continue; } - jl_atomic_store_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes, 0); + jl_atomic_store_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes, 0); for (int i = 0; i < JL_GC_N_POOLS; i++) { jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i]; jl_taggedvalue_t *last = p->freelist; @@ -2841,34 +2835,8 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } -// collector entry point and control -_Atomic(uint32_t) jl_gc_disable_counter = 1; - -JL_DLLEXPORT int jl_gc_enable(int on) -{ - jl_ptls_t ptls = jl_current_task->ptls; - int prev = !ptls->disable_gc; - ptls->disable_gc = (on == 0); - if (on && !prev) { - // disable -> enable - if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { - gc_num.allocd += gc_num.deferred_alloc; - gc_num.deferred_alloc = 0; - } - } - else if (prev && !on) { - // enable -> disable - jl_atomic_fetch_add(&jl_gc_disable_counter, 1); - // check if the GC is running and wait for it to finish - jl_gc_safepoint_(ptls); - } - return prev; -} - -JL_DLLEXPORT int jl_gc_is_enabled(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; +int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT { + return gc_is_parallel_collector_thread(tid) || gc_is_concurrent_collector_thread(tid); } JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT @@ -2879,11 +2847,6 @@ JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT *bytes = (num.total_allocd + num.deferred_alloc + num.allocd); } -JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) -{ - return gc_num.total_time; -} - JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { jl_gc_num_t num = gc_num; @@ -2918,7 +2881,7 @@ JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void) for (int i = 0; i < n_threads; i++) { jl_ptls_t ptls2 = all_tls_states[i]; if (ptls2 != NULL) { - pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes); + pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes); } } return pool_live_bytes; @@ -3271,13 +3234,13 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // free empty GC state for threads that have exited if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { // GC threads should never exit - assert(!gc_is_parallel_collector_thread(t_i)); - assert(!gc_is_concurrent_collector_thread(t_i)); + assert(!gc_is_collector_thread(t_i)); + jl_thread_heap_common_t *common_heap = &ptls2->gc_tls_common.heap; jl_thread_heap_t *heap = &ptls2->gc_tls.heap; - if (heap->weak_refs.len == 0) - small_arraylist_free(&heap->weak_refs); - if (heap->live_tasks.len == 0) - small_arraylist_free(&heap->live_tasks); + if (common_heap->weak_refs.len == 0) + small_arraylist_free(&common_heap->weak_refs); + if (common_heap->live_tasks.len == 0) + small_arraylist_free(&common_heap->live_tasks); if (heap->remset.len == 0) arraylist_free(&heap->remset); if (ptls2->finalizers.len == 0) @@ -3346,8 +3309,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { - size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval; + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; @@ -3449,16 +3412,10 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq) gc_mark_roots(mq); } -// allocator entry points - -JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) -{ - return jl_gc_alloc_(ptls, sz, ty); -} - // Per-thread initialization void jl_init_thread_heap(jl_ptls_t ptls) { + jl_thread_heap_common_t *common_heap = &ptls->gc_tls_common.heap; jl_thread_heap_t *heap = &ptls->gc_tls.heap; jl_gc_pool_t *p = heap->norm_pools; for (int i = 0; i < JL_GC_N_POOLS; i++) { @@ -3466,12 +3423,12 @@ void jl_init_thread_heap(jl_ptls_t ptls) p[i].freelist = NULL; p[i].newpages = NULL; } - small_arraylist_new(&heap->weak_refs, 0); - small_arraylist_new(&heap->live_tasks, 0); + small_arraylist_new(&common_heap->weak_refs, 0); + small_arraylist_new(&common_heap->live_tasks, 0); for (int i = 0; i < JL_N_STACK_POOLS; i++) - small_arraylist_new(&heap->free_stacks[i], 0); - heap->mallocarrays = NULL; - heap->mafreelist = NULL; + small_arraylist_new(&common_heap->free_stacks[i], 0); + common_heap->mallocarrays = NULL; + common_heap->mafreelist = NULL; heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag; @@ -3497,8 +3454,8 @@ void jl_init_thread_heap(jl_ptls_t ptls) jl_atomic_store_relaxed(&q->array, wsa2); arraylist_new(&mq->reclaim_set, 32); - memset(&ptls->gc_tls.gc_num, 0, sizeof(ptls->gc_tls.gc_num)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + memset(&ptls->gc_tls_common.gc_num, 0, sizeof(ptls->gc_tls_common.gc_num)); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); } void jl_free_thread_gc_state(jl_ptls_t ptls) @@ -3685,10 +3642,10 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) if (data != NULL && pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, sz); } return data; @@ -3702,10 +3659,10 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) if (data != NULL && pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + nm*sz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + nm*sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, sz * nm); } return data; @@ -3730,10 +3687,10 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); if (!(sz < old)) - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + (sz - old)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + (sz - old)); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc) + 1); int64_t diff = sz - old; if (diff < 0) { @@ -3746,63 +3703,6 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size return data; } -// allocation wrappers that save the size of allocations, to allow using -// jl_gc_counted_* functions with a libc-compatible API. - -JL_DLLEXPORT void *jl_malloc(size_t sz) -{ - int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); - if (p == NULL) - return NULL; - p[0] = sz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -//_unchecked_calloc does not check for potential overflow of nm*sz -STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { - size_t nmsz = nm*sz; - int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); - if (p == NULL) - return NULL; - p[0] = nmsz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) -{ - if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) - return NULL; - return _unchecked_calloc(nm, sz); -} - -JL_DLLEXPORT void jl_free(void *p) -{ - if (p != NULL) { - int64_t *pp = (int64_t *)p - 2; - size_t sz = pp[0]; - jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); - } -} - -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) -{ - int64_t *pp; - size_t szold; - if (p == NULL) { - pp = NULL; - szold = 0; - } - else { - pp = (int64_t *)p - 2; - szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; - } - int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); - if (pnew == NULL) - return NULL; - pnew[0] = sz; - return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - // allocating blocks for Arrays and Strings JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) @@ -3821,10 +3721,10 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) if (b == NULL) jl_throw(jl_memory_exception); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, allocsz); #ifdef _OS_WINDOWS_ SetLastError(last_error); @@ -3936,18 +3836,6 @@ jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT return jl_valueof(o); } -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - -JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sz, NULL); -} - JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) { if (jl_is_initialized()) { @@ -4075,11 +3963,6 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) } -JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) -{ - return jl_gc_alloc(ptls, sz, ty); -} - JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { arraylist_push(&ptls->gc_tls.sweep_objs, obj); diff --git a/src/gc-stock.h b/src/gc-stock.h index 76cecf68067bf..b9a2e720f120a 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -106,12 +106,6 @@ JL_EXTENSION typedef struct _bigval_t { // must be 64-byte aligned here, in 32 & 64 bit modes } bigval_t; -// data structure for tracking malloc'd genericmemory. -typedef struct _mallocmemory_t { - jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory - struct _mallocmemory_t *next; -} mallocmemory_t; - // pool page metadata typedef struct _jl_gc_pagemeta_t { // next metadata structure in per-thread list diff --git a/src/gc-tls-common.h b/src/gc-tls-common.h new file mode 100644 index 0000000000000..ba36f5c1c238e --- /dev/null +++ b/src/gc-tls-common.h @@ -0,0 +1,52 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +// Meant to be included in "julia_threads.h" +#ifndef JL_GC_TLS_COMMON_H +#define JL_GC_TLS_COMMON_H + +#include "julia_atomics.h" + +// GC threading ------------------------------------------------------------------ + +#include "arraylist.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + // variable for tracking weak references + small_arraylist_t weak_refs; + // live tasks started on this thread + // that are holding onto a stack from the pool + small_arraylist_t live_tasks; + + // variables for tracking malloc'd arrays + struct _mallocmemory_t *mallocarrays; + struct _mallocmemory_t *mafreelist; + +#define JL_N_STACK_POOLS 16 + small_arraylist_t free_stacks[JL_N_STACK_POOLS]; +} jl_thread_heap_common_t; + +typedef struct { + _Atomic(int64_t) allocd; + _Atomic(int64_t) pool_live_bytes; + _Atomic(uint64_t) malloc; + _Atomic(uint64_t) realloc; + _Atomic(uint64_t) poolalloc; + _Atomic(uint64_t) bigalloc; + _Atomic(int64_t) free_acc; + _Atomic(uint64_t) alloc_acc; +} jl_thread_gc_num_common_t; + +typedef struct { + jl_thread_heap_common_t heap; + jl_thread_gc_num_common_t gc_num; +} jl_gc_tls_states_common_t; + +#ifdef __cplusplus +} +#endif + +#endif // JL_GC_TLS_COMMON_H diff --git a/src/gc-tls.h b/src/gc-tls.h index 3c2cc029a6183..d82506383c501 100644 --- a/src/gc-tls.h +++ b/src/gc-tls.h @@ -21,16 +21,6 @@ typedef struct { } jl_gc_pool_t; typedef struct { - // variable for tracking weak references - small_arraylist_t weak_refs; - // live tasks started on this thread - // that are holding onto a stack from the pool - small_arraylist_t live_tasks; - - // variables for tracking malloc'd arrays - struct _mallocmemory_t *mallocarrays; - struct _mallocmemory_t *mafreelist; - // variable for tracking young (i.e. not in `GC_OLD_MARKED`/last generation) large objects struct _bigval_t *young_generation_of_bigvals; @@ -42,22 +32,8 @@ typedef struct { // variables for allocating objects from pools #define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h` jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS]; - -#define JL_N_STACK_POOLS 16 - small_arraylist_t free_stacks[JL_N_STACK_POOLS]; } jl_thread_heap_t; -typedef struct { - _Atomic(int64_t) allocd; - _Atomic(int64_t) pool_live_bytes; - _Atomic(uint64_t) malloc; - _Atomic(uint64_t) realloc; - _Atomic(uint64_t) poolalloc; - _Atomic(uint64_t) bigalloc; - _Atomic(int64_t) free_acc; - _Atomic(uint64_t) alloc_acc; -} jl_thread_gc_num_t; - typedef struct { ws_queue_t chunk_queue; ws_queue_t ptr_queue; @@ -78,7 +54,6 @@ typedef struct { typedef struct { jl_thread_heap_t heap; jl_gc_page_stack_t page_metadata_allocd; - jl_thread_gc_num_t gc_num; jl_gc_markqueue_t mark_queue; jl_gc_mark_cache_t gc_cache; _Atomic(size_t) gc_sweeps_requested; diff --git a/src/julia_internal.h b/src/julia_internal.h index bb8169c6e5f9e..4f735029da444 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -372,6 +372,8 @@ extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT size_t jl_typeinf_world; extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED; +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; + JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; @@ -518,30 +520,6 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE #define GC_MAX_SZCLASS (2032-sizeof(void*)) static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, ""); - -// Size does NOT include the type tag!! -STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) -{ - jl_value_t *v; - const size_t allocsz = sz + sizeof(jl_taggedvalue_t); - if (sz <= GC_MAX_SZCLASS) { - int pool_id = jl_gc_szclass(allocsz); - jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; - int osize = jl_gc_sizeclasses[pool_id]; - // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in - // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) - v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); - } - else { - if (allocsz < sz) // overflow in adding offs, size was "negative" - jl_throw(jl_memory_exception); - v = jl_gc_big_alloc_noinline(ptls, allocsz); - } - jl_set_typeof(v, ty); - maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); - return v; -} - /* Programming style note: When using jl_gc_alloc, do not JL_GC_PUSH it into a * gc frame, until it has been fully initialized. An uninitialized value in a * gc frame can crash upon encountering the first safepoint. By delaying use of diff --git a/src/julia_threads.h b/src/julia_threads.h index 17e8d7d466044..67da2978b4267 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -5,6 +5,7 @@ #define JL_THREADS_H #include "gc-tls.h" +#include "gc-tls-common.h" #include "julia_atomics.h" #ifndef _OS_WINDOWS_ #include "pthread.h" @@ -155,6 +156,7 @@ typedef struct _jl_tls_states_t { // Counter to disable finalizer **on the current thread** int finalizers_inhibited; jl_gc_tls_states_t gc_tls; // this is very large, and the offset of the first member is baked into codegen + jl_gc_tls_states_common_t gc_tls_common; // common tls for both GCs volatile sig_atomic_t defer_signal; _Atomic(struct _jl_task_t*) current_task; struct _jl_task_t *next_task; diff --git a/src/scheduler.c b/src/scheduler.c index bb2f85b52283f..7e23f654c2566 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,10 +80,6 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } -// GC functions used -extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, - jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; - // initialize the threading infrastructure // (called only by the main thread) void jl_init_threadinginfra(void) diff --git a/src/stackwalk.c b/src/stackwalk.c index 7c6f946fe73c5..770daa8bf17a6 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -5,7 +5,7 @@ utilities for walking the stack and looking up information about code addresses */ #include -#include "gc-stock.h" +#include "gc-common.h" #include "julia.h" #include "julia_internal.h" #include "threading.h" @@ -1340,18 +1340,14 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states); for (size_t i = 0; i < nthreads; i++) { jl_ptls_t ptls2 = allstates[i]; - if (gc_is_parallel_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for parallel GC thread %zu\n", i + 1); - continue; - } - if (gc_is_concurrent_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for concurrent GC thread %zu\n", i + 1); + if (gc_is_collector_thread(i)) { + jl_safe_printf("==== Skipping backtrace for parallel/concurrent GC thread %zu\n", i + 1); continue; } if (ptls2 == NULL) { continue; } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); int t_state = JL_TASK_STATE_DONE; jl_task_t *t = ptls2->root_task; From e08280a24fba1b5f1ecf5dce7d8b974d880dae5a Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Sat, 19 Oct 2024 21:04:19 -0400 Subject: [PATCH 17/47] Few more tests for AbstractChar (#56249) --- test/char.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/char.jl b/test/char.jl index 3100add0e81c5..5523125529031 100644 --- a/test/char.jl +++ b/test/char.jl @@ -288,6 +288,10 @@ Base.codepoint(c::ASCIIChar) = reinterpret(UInt8, c) @test string(ASCIIChar('x')) == "x" @test length(ASCIIChar('x')) == 1 @test !isempty(ASCIIChar('x')) + @test ndims(ASCIIChar('x')) == 0 + @test ndims(ASCIIChar) == 0 + @test firstindex(ASCIIChar('x')) == 1 + @test lastindex(ASCIIChar('x')) == 1 @test eltype(ASCIIChar) == ASCIIChar @test_throws MethodError write(IOBuffer(), ASCIIChar('x')) @test_throws MethodError read(IOBuffer('x'), ASCIIChar) From 1fd7ada972911c181750d104604487a35ae3bea9 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Sun, 20 Oct 2024 13:12:44 -0400 Subject: [PATCH 18/47] REPL: run repl hint generation for modeswitch chars when not switching (#56251) Fixes https://github.com/JuliaLang/julia/issues/56003 --- stdlib/REPL/src/REPL.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index b8f850c3e9ff9..ac791327e2d75 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -1431,6 +1431,7 @@ function setup_interface( end else edit_insert(s, ';') + LineEdit.check_for_hint(s) && LineEdit.refresh_line(s) end end, '?' => function (s::MIState,o...) @@ -1441,6 +1442,7 @@ function setup_interface( end else edit_insert(s, '?') + LineEdit.check_for_hint(s) && LineEdit.refresh_line(s) end end, ']' => function (s::MIState,o...) @@ -1477,6 +1479,7 @@ function setup_interface( Base.errormonitor(t_replswitch) else edit_insert(s, ']') + LineEdit.check_for_hint(s) && LineEdit.refresh_line(s) end end, From a4a4b954450467d40e6d4dc22b17269da1eb5337 Mon Sep 17 00:00:00 2001 From: DilumAluthgeBot <43731525+DilumAluthgeBot@users.noreply.github.com> Date: Sun, 20 Oct 2024 21:01:52 -0400 Subject: [PATCH 19/47] =?UTF-8?q?=F0=9F=A4=96=20[master]=20Bump=20the=20Pk?= =?UTF-8?q?g=20stdlib=20from=2027c1b1ee5=20to=20799dc2d54=20(#56257)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stdlib: Pkg URL: https://github.com/JuliaLang/Pkg.jl.git Stdlib branch: master Julia branch: master Old commit: 27c1b1ee5 New commit: 799dc2d54 Julia version: 1.12.0-DEV Pkg version: 1.12.0 Bump invoked by: @IanButterworth Powered by: [BumpStdlibs.jl](https://github.com/JuliaLang/BumpStdlibs.jl) Diff: https://github.com/JuliaLang/Pkg.jl/compare/27c1b1ee5cf15571eb5e54707e812d646ac1dde3...799dc2d54c4e809b9779de8c604564a5b3befaa0 ``` $ git log --oneline 27c1b1ee5..799dc2d54 799dc2d54 REPLExt: use Base.isaccessibledir rather than isdir in completions (#4053) 3fde94ee9 REPLExt: run repl hint generation for modeswitch chars when not switching (#4054) ``` Co-authored-by: Dilum Aluthge --- .../Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/md5 | 1 - .../Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/sha512 | 1 - .../Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/md5 | 1 + .../Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/sha512 | 1 + stdlib/Pkg.version | 2 +- 5 files changed, 3 insertions(+), 3 deletions(-) delete mode 100644 deps/checksums/Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/md5 delete mode 100644 deps/checksums/Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/sha512 create mode 100644 deps/checksums/Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/md5 create mode 100644 deps/checksums/Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/sha512 diff --git a/deps/checksums/Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/md5 b/deps/checksums/Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/md5 deleted file mode 100644 index 137460d1a05a1..0000000000000 --- a/deps/checksums/Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -74d656c054c1406a7e88910d673019f7 diff --git a/deps/checksums/Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/sha512 b/deps/checksums/Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/sha512 deleted file mode 100644 index 0b8463176a867..0000000000000 --- a/deps/checksums/Pkg-27c1b1ee5cf15571eb5e54707e812d646ac1dde3.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -a8e589ce68cc14883a7a21f68862695bfaa9ab38dfa0e704c32aaa801667708af0d851a41199ad09ae81a4c0b928befb680d639c1eca3377ce2db2dcc34b98e5 diff --git a/deps/checksums/Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/md5 b/deps/checksums/Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/md5 new file mode 100644 index 0000000000000..7c0bfbf62bd6e --- /dev/null +++ b/deps/checksums/Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/md5 @@ -0,0 +1 @@ +6fce8506a1701acdcbc4888250eeb86a diff --git a/deps/checksums/Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/sha512 b/deps/checksums/Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/sha512 new file mode 100644 index 0000000000000..06e3ea9c8dfa7 --- /dev/null +++ b/deps/checksums/Pkg-799dc2d54c4e809b9779de8c604564a5b3befaa0.tar.gz/sha512 @@ -0,0 +1 @@ +e251745da221a82f3ec5e21a76c29df0b695dc4028ee2c719373c08637050318db7b543c9d40074314fc3495738d39fd8af5a7954e8b72695df44e25e395f883 diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version index 470acefbc6c83..c29c83fce4046 100644 --- a/stdlib/Pkg.version +++ b/stdlib/Pkg.version @@ -1,4 +1,4 @@ PKG_BRANCH = master -PKG_SHA1 = 27c1b1ee5cf15571eb5e54707e812d646ac1dde3 +PKG_SHA1 = 799dc2d54c4e809b9779de8c604564a5b3befaa0 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1 From fee8090fa86fb0529ae59fe3e3d339c60e2b90a8 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 21 Oct 2024 02:45:47 -0300 Subject: [PATCH 20/47] Make isbitstypes use memmove instead of the runtime function in copyto! (#56237) This might help llvm understand whats going on. Also enzyme really wants this to look like this to trace through it better. --------- Co-authored-by: Jameson Nash --- base/genericmemory.jl | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/base/genericmemory.jl b/base/genericmemory.jl index 89861444d9652..de1fc668333f5 100644 --- a/base/genericmemory.jl +++ b/base/genericmemory.jl @@ -118,7 +118,17 @@ function unsafe_copyto!(dest::MemoryRef{T}, src::MemoryRef{T}, n) where {T} @_terminates_globally_notaskstate_meta n == 0 && return dest @boundscheck memoryref(dest, n), memoryref(src, n) - ccall(:jl_genericmemory_copyto, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int), dest.mem, dest.ptr_or_offset, src.mem, src.ptr_or_offset, Int(n)) + if isbitstype(T) + tdest = @_gc_preserve_begin dest + tsrc = @_gc_preserve_begin src + pdest = unsafe_convert(Ptr{Cvoid}, dest) + psrc = unsafe_convert(Ptr{Cvoid}, src) + memmove(pdest, psrc, aligned_sizeof(T) * n) + @_gc_preserve_end tdest + @_gc_preserve_end tsrc + else + ccall(:jl_genericmemory_copyto, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int), dest.mem, dest.ptr_or_offset, src.mem, src.ptr_or_offset, Int(n)) + end return dest end From b01095e0274bf078e162379a9f243197710053ff Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Mon, 21 Oct 2024 11:28:56 +0530 Subject: [PATCH 21/47] Fix kron indexing for types without a unique zero (#56229) This fixes a bug introduced in https://github.com/JuliaLang/julia/pull/55941. We may also take this opportunity to limit the scope of the `@inbounds` annotations, and also use `axes` to compute the bounds instead of hard-coding them. The real "fix" here is on line 767, where `l in 1:nA` should have been `l in 1:mB`. Using `axes` avoids such errors, and makes the operation safer as well. --- stdlib/LinearAlgebra/src/diagonal.jl | 50 +++++++++++++-------------- stdlib/LinearAlgebra/test/diagonal.jl | 4 +-- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl index 17ff232f5b262..6e8ce96259fc1 100644 --- a/stdlib/LinearAlgebra/src/diagonal.jl +++ b/stdlib/LinearAlgebra/src/diagonal.jl @@ -700,16 +700,16 @@ end zerofilled = true end end - @inbounds for i = 1:nA, j = 1:nB + for i in eachindex(valA), j in eachindex(valB) idx = (i-1)*nB+j - C[idx, idx] = valA[i] * valB[j] + @inbounds C[idx, idx] = valA[i] * valB[j] end if !zerofilled - for j in 1:nA, i in 1:mA + for j in axes(A,2), i in axes(A,1) Δrow, Δcol = (i-1)*mB, (j-1)*nB - for k in 1:nB, l in 1:mB + for k in axes(B,2), l in axes(B,1) i == j && k == l && continue - C[Δrow + l, Δcol + k] = A[i,j] * B[l,k] + @inbounds C[Δrow + l, Δcol + k] = A[i,j] * B[l,k] end end end @@ -749,24 +749,24 @@ end end end m = 1 - @inbounds for j = 1:nA - A_jj = A[j,j] - for k = 1:nB - for l = 1:mB - C[m] = A_jj * B[l,k] + for j in axes(A,2) + A_jj = @inbounds A[j,j] + for k in axes(B,2) + for l in axes(B,1) + @inbounds C[m] = A_jj * B[l,k] m += 1 end m += (nA - 1) * mB end if !zerofilled # populate the zero elements - for i in 1:mA + for i in axes(A,1) i == j && continue - A_ij = A[i, j] + A_ij = @inbounds A[i, j] Δrow, Δcol = (i-1)*mB, (j-1)*nB - for k in 1:nB, l in 1:nA - B_lk = B[l, k] - C[Δrow + l, Δcol + k] = A_ij * B_lk + for k in axes(B,2), l in axes(B,1) + B_lk = @inbounds B[l, k] + @inbounds C[Δrow + l, Δcol + k] = A_ij * B_lk end end end @@ -792,23 +792,23 @@ end end end m = 1 - @inbounds for j = 1:nA - for l = 1:mB - Bll = B[l,l] - for i = 1:mA - C[m] = A[i,j] * Bll + for j in axes(A,2) + for l in axes(B,1) + Bll = @inbounds B[l,l] + for i in axes(A,1) + @inbounds C[m] = A[i,j] * Bll m += nB end m += 1 end if !zerofilled - for i in 1:mA - A_ij = A[i, j] + for i in axes(A,1) + A_ij = @inbounds A[i, j] Δrow, Δcol = (i-1)*mB, (j-1)*nB - for k in 1:nB, l in 1:mB + for k in axes(B,2), l in axes(B,1) l == k && continue - B_lk = B[l, k] - C[Δrow + l, Δcol + k] = A_ij * B_lk + B_lk = @inbounds B[l, k] + @inbounds C[Δrow + l, Δcol + k] = A_ij * B_lk end end end diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl index 8b56ee15e56e3..1c3a9dfa676ac 100644 --- a/stdlib/LinearAlgebra/test/diagonal.jl +++ b/stdlib/LinearAlgebra/test/diagonal.jl @@ -353,7 +353,7 @@ Random.seed!(1) D3 = Diagonal(convert(Vector{elty}, rand(n÷2))) DM3= Matrix(D3) @test Matrix(kron(D, D3)) ≈ kron(DM, DM3) - M4 = rand(elty, n÷2, n÷2) + M4 = rand(elty, size(D3,1) + 1, size(D3,2) + 2) # choose a different size from D3 @test kron(D3, M4) ≈ kron(DM3, M4) @test kron(M4, D3) ≈ kron(M4, DM3) X = [ones(1,1) for i in 1:2, j in 1:2] @@ -1392,7 +1392,7 @@ end end @testset "zeros in kron with block matrices" begin - D = Diagonal(1:2) + D = Diagonal(1:4) B = reshape([ones(2,2), ones(3,2), ones(2,3), ones(3,3)], 2, 2) @test kron(D, B) == kron(Array(D), B) @test kron(B, D) == kron(B, Array(D)) From 04259daf8a339f99d7cd8503d9c1f154b247e4e1 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Mon, 21 Oct 2024 11:31:36 +0530 Subject: [PATCH 22/47] Reroute` (Upper/Lower)Triangular * Diagonal` through `__muldiag` (#55984) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, `::Diagonal * ::AbstractMatrix` calls the method `LinearAlgebra.__muldiag!` in general that scales the rows, and similarly for the diagonal on the right. The implementation of `__muldiag` was duplicating the logic in `LinearAlgebra.modify!` and the methods for `MulAddMul`. This PR replaces the various branches with calls to `modify!` instead. I've also extracted the multiplication logic into its own function `__muldiag_nonzeroalpha!` so that this may be specialized for matrix types, such as triangular ones. Secondly, `::Diagonal * ::UpperTriangular` (and similarly, other triangular matrices) was specialized to forward the multiplication to the parent of the triangular. For strided matrices, however, it makes more sense to use the structure and scale only the filled half of the matrix. Firstly, this improves performance, and secondly, this avoids errors in case the parent isn't fully initialized corresponding to the structural zero elements. Performance improvement: ```julia julia> D = Diagonal(1:400); julia> U = UpperTriangular(zeros(size(D))); julia> @btime $D * $U; 314.944 μs (3 allocations: 1.22 MiB) # v"1.12.0-DEV.1288" 195.960 μs (3 allocations: 1.22 MiB) # This PR ``` Fix: ```julia julia> M = Matrix{BigFloat}(undef, 2, 2); julia> M[1,1] = M[2,2] = M[1,2] = 3; julia> U = UpperTriangular(M) 2×2 UpperTriangular{BigFloat, Matrix{BigFloat}}: 3.0 3.0 ⋅ 3.0 julia> D = Diagonal(1:2); julia> U * D # works after this PR 2×2 UpperTriangular{BigFloat, Matrix{BigFloat}}: 3.0 6.0 ⋅ 6.0 ``` --- stdlib/LinearAlgebra/src/LinearAlgebra.jl | 2 + stdlib/LinearAlgebra/src/diagonal.jl | 158 +++++++++++++--------- stdlib/LinearAlgebra/test/diagonal.jl | 40 +++++- 3 files changed, 134 insertions(+), 66 deletions(-) diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl index 15354603943c2..88fc3476c9d7f 100644 --- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl +++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl @@ -655,6 +655,8 @@ matprod_dest(A::StructuredMatrix, B::Diagonal, TS) = _matprod_dest_diag(A, TS) matprod_dest(A::Diagonal, B::StructuredMatrix, TS) = _matprod_dest_diag(B, TS) matprod_dest(A::Diagonal, B::Diagonal, TS) = _matprod_dest_diag(B, TS) _matprod_dest_diag(A, TS) = similar(A, TS) +_matprod_dest_diag(A::UnitUpperTriangular, TS) = UpperTriangular(similar(parent(A), TS)) +_matprod_dest_diag(A::UnitLowerTriangular, TS) = LowerTriangular(similar(parent(A), TS)) function _matprod_dest_diag(A::SymTridiagonal, TS) n = size(A, 1) ev = similar(A, TS, max(0, n-1)) diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl index 6e8ce96259fc1..8ba4c3d457e83 100644 --- a/stdlib/LinearAlgebra/src/diagonal.jl +++ b/stdlib/LinearAlgebra/src/diagonal.jl @@ -396,82 +396,120 @@ function lmul!(D::Diagonal, T::Tridiagonal) return T end -function __muldiag!(out, D::Diagonal, B, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} +@inline function __muldiag_nonzeroalpha!(out, D::Diagonal, B, _add::MulAddMul) + @inbounds for j in axes(B, 2) + @simd for i in axes(B, 1) + _modify!(_add, D.diag[i] * B[i,j], out, (i,j)) + end + end + out +end +_maybe_unwrap_tri(out, A) = out, A +_maybe_unwrap_tri(out::UpperTriangular, A::UpperOrUnitUpperTriangular) = parent(out), parent(A) +_maybe_unwrap_tri(out::LowerTriangular, A::LowerOrUnitLowerTriangular) = parent(out), parent(A) +@inline function __muldiag_nonzeroalpha!(out, D::Diagonal, B::UpperOrLowerTriangular, _add::MulAddMul) + isunit = B isa Union{UnitUpperTriangular, UnitLowerTriangular} + # if both B and out have the same upper/lower triangular structure, + # we may directly read and write from the parents + out_maybeparent, B_maybeparent = _maybe_unwrap_tri(out, B) + for j in axes(B, 2) + if isunit + _modify!(_add, D.diag[j] * B[j,j], out, (j,j)) + end + rowrange = B isa UpperOrUnitUpperTriangular ? (1:min(j-isunit, size(B,1))) : (j+isunit:size(B,1)) + @inbounds @simd for i in rowrange + _modify!(_add, D.diag[i] * B_maybeparent[i,j], out_maybeparent, (i,j)) + end + end + out +end +function __muldiag!(out, D::Diagonal, B, _add::MulAddMul) require_one_based_indexing(out, B) alpha, beta = _add.alpha, _add.beta if iszero(alpha) _rmul_or_fill!(out, beta) else - if bis0 - @inbounds for j in axes(B, 2) - @simd for i in axes(B, 1) - out[i,j] = D.diag[i] * B[i,j] * alpha - end - end - else - @inbounds for j in axes(B, 2) - @simd for i in axes(B, 1) - out[i,j] = D.diag[i] * B[i,j] * alpha + out[i,j] * beta - end - end - end + __muldiag_nonzeroalpha!(out, D, B, _add) end return out end -function __muldiag!(out, A, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} + +@inline function __muldiag_nonzeroalpha!(out, A, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} + beta = _add.beta + _add_aisone = MulAddMul{true,bis0,Bool,typeof(beta)}(true, beta) + @inbounds for j in axes(A, 2) + dja = _add(D.diag[j]) + @simd for i in axes(A, 1) + _modify!(_add_aisone, A[i,j] * dja, out, (i,j)) + end + end + out +end +@inline function __muldiag_nonzeroalpha!(out, A::UpperOrLowerTriangular, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} + isunit = A isa Union{UnitUpperTriangular, UnitLowerTriangular} + beta = _add.beta + # since alpha is multiplied to the diagonal element of D, + # we may skip alpha in the second multiplication by setting ais1 to true + _add_aisone = MulAddMul{true,bis0,Bool,typeof(beta)}(true, beta) + # if both A and out have the same upper/lower triangular structure, + # we may directly read and write from the parents + out_maybeparent, A_maybeparent = _maybe_unwrap_tri(out, A) + @inbounds for j in axes(A, 2) + dja = _add(D.diag[j]) + if isunit + _modify!(_add_aisone, A[j,j] * dja, out, (j,j)) + end + rowrange = A isa UpperOrUnitUpperTriangular ? (1:min(j-isunit, size(A,1))) : (j+isunit:size(A,1)) + @simd for i in rowrange + _modify!(_add_aisone, A_maybeparent[i,j] * dja, out_maybeparent, (i,j)) + end + end + out +end +function __muldiag!(out, A, D::Diagonal, _add::MulAddMul) require_one_based_indexing(out, A) alpha, beta = _add.alpha, _add.beta if iszero(alpha) _rmul_or_fill!(out, beta) else - if bis0 - @inbounds for j in axes(A, 2) - dja = D.diag[j] * alpha - @simd for i in axes(A, 1) - out[i,j] = A[i,j] * dja - end - end - else - @inbounds for j in axes(A, 2) - dja = D.diag[j] * alpha - @simd for i in axes(A, 1) - out[i,j] = A[i,j] * dja + out[i,j] * beta - end - end - end + __muldiag_nonzeroalpha!(out, A, D, _add) end return out end -function __muldiag!(out::Diagonal, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} + +@inline function __muldiag_nonzeroalpha!(out::Diagonal, D1::Diagonal, D2::Diagonal, _add::MulAddMul) d1 = D1.diag d2 = D2.diag + outd = out.diag + @inbounds @simd for i in eachindex(d1, d2, outd) + _modify!(_add, d1[i] * d2[i], outd, i) + end + out +end +function __muldiag!(out::Diagonal, D1::Diagonal, D2::Diagonal, _add::MulAddMul) alpha, beta = _add.alpha, _add.beta if iszero(alpha) _rmul_or_fill!(out.diag, beta) else - if bis0 - @inbounds @simd for i in eachindex(out.diag) - out.diag[i] = d1[i] * d2[i] * alpha - end - else - @inbounds @simd for i in eachindex(out.diag) - out.diag[i] = d1[i] * d2[i] * alpha + out.diag[i] * beta - end - end + __muldiag_nonzeroalpha!(out, D1, D2, _add) end return out end -function __muldiag!(out, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} - require_one_based_indexing(out) - alpha, beta = _add.alpha, _add.beta - mA = size(D1, 1) +@inline function __muldiag_nonzeroalpha!(out, D1::Diagonal, D2::Diagonal, _add::MulAddMul) d1 = D1.diag d2 = D2.diag + @inbounds @simd for i in eachindex(d1, d2) + _modify!(_add, d1[i] * d2[i], out, (i,i)) + end + out +end +function __muldiag!(out, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1}) where {ais1} + require_one_based_indexing(out) + alpha, beta = _add.alpha, _add.beta _rmul_or_fill!(out, beta) if !iszero(alpha) - @inbounds @simd for i in 1:mA - out[i,i] += d1[i] * d2[i] * alpha - end + _add_bis1 = MulAddMul{ais1,false,typeof(alpha),Bool}(alpha,true) + __muldiag_nonzeroalpha!(out, D1, D2, _add_bis1) end return out end @@ -658,31 +696,21 @@ for Tri in (:UpperTriangular, :LowerTriangular) @eval $fun(A::$Tri, D::Diagonal) = $Tri($fun(A.data, D)) @eval $fun(A::$UTri, D::Diagonal) = $Tri(_setdiag!($fun(A.data, D), $f, D.diag)) end + @eval *(A::$Tri{<:Any, <:StridedMaybeAdjOrTransMat}, D::Diagonal) = + @invoke *(A::AbstractMatrix, D::Diagonal) + @eval *(A::$UTri{<:Any, <:StridedMaybeAdjOrTransMat}, D::Diagonal) = + @invoke *(A::AbstractMatrix, D::Diagonal) for (fun, f) in zip((:*, :lmul!, :ldiv!, :\), (:identity, :identity, :inv, :inv)) @eval $fun(D::Diagonal, A::$Tri) = $Tri($fun(D, A.data)) @eval $fun(D::Diagonal, A::$UTri) = $Tri(_setdiag!($fun(D, A.data), $f, D.diag)) end + @eval *(D::Diagonal, A::$Tri{<:Any, <:StridedMaybeAdjOrTransMat}) = + @invoke *(D::Diagonal, A::AbstractMatrix) + @eval *(D::Diagonal, A::$UTri{<:Any, <:StridedMaybeAdjOrTransMat}) = + @invoke *(D::Diagonal, A::AbstractMatrix) # 3-arg ldiv! @eval ldiv!(C::$Tri, D::Diagonal, A::$Tri) = $Tri(ldiv!(C.data, D, A.data)) @eval ldiv!(C::$Tri, D::Diagonal, A::$UTri) = $Tri(_setdiag!(ldiv!(C.data, D, A.data), inv, D.diag)) - # 3-arg mul! is disambiguated in special.jl - # 5-arg mul! - @eval _mul!(C::$Tri, D::Diagonal, A::$Tri, _add) = $Tri(mul!(C.data, D, A.data, _add.alpha, _add.beta)) - @eval function _mul!(C::$Tri, D::Diagonal, A::$UTri, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} - α, β = _add.alpha, _add.beta - iszero(α) && return _rmul_or_fill!(C, β) - diag′ = bis0 ? nothing : diag(C) - data = mul!(C.data, D, A.data, α, β) - $Tri(_setdiag!(data, _add, D.diag, diag′)) - end - @eval _mul!(C::$Tri, A::$Tri, D::Diagonal, _add) = $Tri(mul!(C.data, A.data, D, _add.alpha, _add.beta)) - @eval function _mul!(C::$Tri, A::$UTri, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} - α, β = _add.alpha, _add.beta - iszero(α) && return _rmul_or_fill!(C, β) - diag′ = bis0 ? nothing : diag(C) - data = mul!(C.data, A.data, D, α, β) - $Tri(_setdiag!(data, _add, D.diag, diag′)) - end end @inline function kron!(C::AbstractMatrix, A::Diagonal, B::Diagonal) diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl index 1c3a9dfa676ac..380a0465028d1 100644 --- a/stdlib/LinearAlgebra/test/diagonal.jl +++ b/stdlib/LinearAlgebra/test/diagonal.jl @@ -1188,7 +1188,7 @@ end @test oneunit(D3) isa typeof(D3) end -@testset "AbstractTriangular" for (Tri, UTri) in ((UpperTriangular, UnitUpperTriangular), (LowerTriangular, UnitLowerTriangular)) +@testset "$Tri" for (Tri, UTri) in ((UpperTriangular, UnitUpperTriangular), (LowerTriangular, UnitLowerTriangular)) A = randn(4, 4) TriA = Tri(A) UTriA = UTri(A) @@ -1218,6 +1218,44 @@ end @test outTri === mul!(outTri, D, UTriA, 2, 1)::Tri == mul!(out, D, Matrix(UTriA), 2, 1) @test outTri === mul!(outTri, TriA, D, 2, 1)::Tri == mul!(out, Matrix(TriA), D, 2, 1) @test outTri === mul!(outTri, UTriA, D, 2, 1)::Tri == mul!(out, Matrix(UTriA), D, 2, 1) + + # we may write to a Unit triangular if the diagonal is preserved + ID = Diagonal(ones(size(UTriA,2))) + @test mul!(copy(UTriA), UTriA, ID) == UTriA + @test mul!(copy(UTriA), ID, UTriA) == UTriA + + @testset "partly filled parents" begin + M = Matrix{BigFloat}(undef, 2, 2) + M[1,1] = M[2,2] = 3 + isupper = Tri == UpperTriangular + M[1+!isupper, 1+isupper] = 3 + D = Diagonal(1:2) + T = Tri(M) + TA = Array(T) + @test T * D == TA * D + @test D * T == D * TA + @test mul!(copy(T), T, D, 2, 3) == 2T * D + 3T + @test mul!(copy(T), D, T, 2, 3) == 2D * T + 3T + + U = UTri(M) + UA = Array(U) + @test U * D == UA * D + @test D * U == D * UA + @test mul!(copy(T), U, D, 2, 3) == 2 * UA * D + 3TA + @test mul!(copy(T), D, U, 2, 3) == 2 * D * UA + 3TA + + M2 = Matrix{BigFloat}(undef, 2, 2) + M2[1+!isupper, 1+isupper] = 3 + U = UTri(M2) + UA = Array(U) + @test U * D == UA * D + @test D * U == D * UA + ID = Diagonal(ones(size(U,2))) + @test mul!(copy(U), U, ID) == U + @test mul!(copy(U), ID, U) == U + @test mul!(copy(U), U, ID, 2, -1) == U + @test mul!(copy(U), ID, U, 2, -1) == U + end end struct SMatrix1{T} <: AbstractArray{T,2} From e3f2f6b9c69293da68c1019366f8397d2049c6d8 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 18 Oct 2024 14:42:23 +0200 Subject: [PATCH 23/47] Revert "Extensions: make loading of extensions independent of what packages are in the sysimage (#52841)" This reverts commit 08d229f4a7cb0c3ef5becddd1b3bc4f8f178b8e4. --- base/Base.jl | 1 - base/loading.jl | 9 +++------ test/loading.jl | 19 ------------------- .../HasDepWithExtensions.jl/Manifest.toml | 7 +------ .../Extensions/HasExtensions.jl/Project.toml | 2 -- .../HasExtensions.jl/ext/LinearAlgebraExt.jl | 3 --- 6 files changed, 4 insertions(+), 37 deletions(-) delete mode 100644 test/project/Extensions/HasExtensions.jl/ext/LinearAlgebraExt.jl diff --git a/base/Base.jl b/base/Base.jl index 9800462f855f9..5fb764bd4cc01 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -646,7 +646,6 @@ function __init__() init_load_path() init_active_project() append!(empty!(_sysimage_modules), keys(loaded_modules)) - empty!(explicit_loaded_modules) empty!(loaded_precompiles) # If we load a packageimage when building the image this might not be empty for (mod, key) in module_keys push!(get!(Vector{Module}, loaded_precompiles, key), mod) diff --git a/base/loading.jl b/base/loading.jl index db6a681bb2a5b..19fcaba388d11 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1554,7 +1554,7 @@ function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any} # TODO: Better error message if this lookup fails? uuid_trigger = UUID(totaldeps[trigger]::String) trigger_id = PkgId(uuid_trigger, trigger) - if !haskey(explicit_loaded_modules, trigger_id) || haskey(package_locks, trigger_id) + if !haskey(Base.loaded_modules, trigger_id) || haskey(package_locks, trigger_id) trigger1 = get!(Vector{ExtensionId}, EXT_DORMITORY, trigger_id) push!(trigger1, gid) else @@ -2430,9 +2430,8 @@ function __require_prelocked(uuidkey::PkgId, env=nothing) insert_extension_triggers(uuidkey) # After successfully loading, notify downstream consumers run_package_callbacks(uuidkey) - elseif !haskey(explicit_loaded_modules, uuidkey) - explicit_loaded_modules[uuidkey] = m - run_package_callbacks(uuidkey) + else + newm = root_module(uuidkey) end return m end @@ -2445,7 +2444,6 @@ end PkgOrigin() = PkgOrigin(nothing, nothing, nothing) const pkgorigins = Dict{PkgId,PkgOrigin}() -const explicit_loaded_modules = Dict{PkgId,Module}() # Emptied on Julia start const loaded_modules = Dict{PkgId,Module}() # available to be explicitly loaded const loaded_precompiles = Dict{PkgId,Vector{Module}}() # extended (complete) list of modules, available to be loaded const loaded_modules_order = Vector{Module}() @@ -2485,7 +2483,6 @@ end end maybe_loaded_precompile(key, module_build_id(m)) === nothing && push!(loaded_modules_order, m) loaded_modules[key] = m - explicit_loaded_modules[key] = m module_keys[m] = key end nothing diff --git a/test/loading.jl b/test/loading.jl index ec4a0391a412a..ecba64ca45a73 100644 --- a/test/loading.jl +++ b/test/loading.jl @@ -1129,25 +1129,6 @@ end run(cmd_proj_ext) end - # Sysimage extensions - # The test below requires that LinearAlgebra is in the sysimage and that it has not been loaded yet. - # if it gets moved out, this test will need to be updated. - # We run this test in a new process so we are not vulnerable to a previous test having loaded LinearAlgebra - sysimg_ext_test_code = """ - uuid_key = Base.PkgId(Base.UUID("37e2e46d-f89d-539d-b4ee-838fcccc9c8e"), "LinearAlgebra") - Base.in_sysimage(uuid_key) || error("LinearAlgebra not in sysimage") - haskey(Base.explicit_loaded_modules, uuid_key) && error("LinearAlgebra already loaded") - using HasExtensions - Base.get_extension(HasExtensions, :LinearAlgebraExt) === nothing || error("unexpectedly got an extension") - using LinearAlgebra - haskey(Base.explicit_loaded_modules, uuid_key) || error("LinearAlgebra not loaded") - Base.get_extension(HasExtensions, :LinearAlgebraExt) isa Module || error("expected extension to load") - """ - cmd = `$(Base.julia_cmd()) --startup-file=no -e $sysimg_ext_test_code` - cmd = addenv(cmd, "JULIA_LOAD_PATH" => join([proj, "@stdlib"], sep)) - run(cmd) - - # Extensions in implicit environments old_load_path = copy(LOAD_PATH) try diff --git a/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml index 5706aba59d1e0..f659a59e0910b 100644 --- a/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml +++ b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml @@ -25,17 +25,12 @@ deps = ["ExtDep3"] path = "../HasExtensions.jl" uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8" version = "0.1.0" +weakdeps = ["ExtDep", "ExtDep2"] [deps.HasExtensions.extensions] Extension = "ExtDep" ExtensionDep = "ExtDep3" ExtensionFolder = ["ExtDep", "ExtDep2"] - LinearAlgebraExt = "LinearAlgebra" - - [deps.HasExtensions.weakdeps] - ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c" - ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d" - LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [[deps.SomePackage]] path = "../SomePackage" diff --git a/test/project/Extensions/HasExtensions.jl/Project.toml b/test/project/Extensions/HasExtensions.jl/Project.toml index fe21a1423f543..a02f5662d602d 100644 --- a/test/project/Extensions/HasExtensions.jl/Project.toml +++ b/test/project/Extensions/HasExtensions.jl/Project.toml @@ -8,10 +8,8 @@ ExtDep3 = "a5541f1e-a556-4fdc-af15-097880d743a1" [weakdeps] ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c" ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d" -LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [extensions] Extension = "ExtDep" ExtensionDep = "ExtDep3" ExtensionFolder = ["ExtDep", "ExtDep2"] -LinearAlgebraExt = "LinearAlgebra" diff --git a/test/project/Extensions/HasExtensions.jl/ext/LinearAlgebraExt.jl b/test/project/Extensions/HasExtensions.jl/ext/LinearAlgebraExt.jl deleted file mode 100644 index 19f87cb849417..0000000000000 --- a/test/project/Extensions/HasExtensions.jl/ext/LinearAlgebraExt.jl +++ /dev/null @@ -1,3 +0,0 @@ -module LinearAlgebraExt - -end From ad1dc390e3123b65433ee06a651ca6de88c29914 Mon Sep 17 00:00:00 2001 From: KristofferC Date: Mon, 21 Oct 2024 14:18:48 +0200 Subject: [PATCH 24/47] fix lookup when extension is in `[deps]` --- base/loading.jl | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 19fcaba388d11..e08d03ad513c7 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -974,14 +974,14 @@ function explicit_manifest_deps_get(project_file::String, where::PkgId, name::St entry = entry::Dict{String, Any} uuid = get(entry, "uuid", nothing)::Union{String, Nothing} uuid === nothing && continue + # deps is either a list of names (deps = ["DepA", "DepB"]) or + # a table of entries (deps = {"DepA" = "6ea...", "DepB" = "55d..."} + deps = get(entry, "deps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing} if UUID(uuid) === where.uuid found_where = true - # deps is either a list of names (deps = ["DepA", "DepB"]) or - # a table of entries (deps = {"DepA" = "6ea...", "DepB" = "55d..."} - deps = get(entry, "deps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing} if deps isa Vector{String} found_name = name in deps - break + found_name && @goto done elseif deps isa Dict{String, Any} deps = deps::Dict{String, Any} for (dep, uuid) in deps @@ -1000,23 +1000,25 @@ function explicit_manifest_deps_get(project_file::String, where::PkgId, name::St return PkgId(UUID(uuid), name) end exts = extensions[where.name]::Union{String, Vector{String}} + weakdeps = get(entry, "weakdeps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing} if (exts isa String && name == exts) || (exts isa Vector{String} && name in exts) - weakdeps = get(entry, "weakdeps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing} - if weakdeps !== nothing - if weakdeps isa Vector{String} - found_name = name in weakdeps - break - elseif weakdeps isa Dict{String, Any} - weakdeps = weakdeps::Dict{String, Any} - for (dep, uuid) in weakdeps - uuid::String - if dep === name - return PkgId(UUID(uuid), name) + for deps′ in [weakdeps, deps] + if deps′ !== nothing + if deps′ isa Vector{String} + found_name = name in deps′ + found_name && @goto done + elseif deps′ isa Dict{String, Any} + deps′ = deps′::Dict{String, Any} + for (dep, uuid) in deps′ + uuid::String + if dep === name + return PkgId(UUID(uuid), name) + end + end end end end end - end # `name` is not an ext, do standard lookup as if this was the parent return identify_package(PkgId(UUID(uuid), dep_name), name) end @@ -1024,6 +1026,7 @@ function explicit_manifest_deps_get(project_file::String, where::PkgId, name::St end end end + @label done found_where || return nothing found_name || return PkgId(name) # Only reach here if deps was not a dict which mean we have a unique name for the dep From 1c67d0cfdc8ab109120dc3f0720053e509a10131 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Mon, 21 Oct 2024 08:21:44 -0400 Subject: [PATCH 25/47] REPL: fix brace detection when ' is used for transpose (#56252) --- stdlib/REPL/src/REPLCompletions.jl | 9 +++++++-- stdlib/REPL/test/replcompletions.jl | 6 ++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl index 5e80e17036559..42480aea91605 100644 --- a/stdlib/REPL/src/REPLCompletions.jl +++ b/stdlib/REPL/src/REPLCompletions.jl @@ -480,6 +480,7 @@ function find_start_brace(s::AbstractString; c_start='(', c_end=')') i = firstindex(r) braces = in_comment = 0 in_single_quotes = in_double_quotes = in_back_ticks = false + num_single_quotes_in_string = count('\'', s) while i <= ncodeunits(r) c, i = iterate(r, i) if c == '#' && i <= ncodeunits(r) && iterate(r, i)[1] == '=' @@ -502,7 +503,9 @@ function find_start_brace(s::AbstractString; c_start='(', c_end=')') braces += 1 elseif c == c_end braces -= 1 - elseif c == '\'' + elseif c == '\'' && num_single_quotes_in_string % 2 == 0 + # ' can be a transpose too, so check if there are even number of 's in the string + # TODO: This probably needs to be more robust in_single_quotes = true elseif c == '"' in_double_quotes = true @@ -1197,7 +1200,9 @@ function complete_identifiers!(suggestions::Vector{Completion}, if !isinfix # Handle infix call argument completion of the form bar + foo(qux). frange, end_of_identifier = find_start_brace(@view s[1:prevind(s, end)]) - isinfix = Meta.parse(@view(s[frange[1]:end]), raise=false, depwarn=false) == prefix.args[end] + if !isempty(frange) # if find_start_brace fails to find the brace just continue + isinfix = Meta.parse(@view(s[frange[1]:end]), raise=false, depwarn=false) == prefix.args[end] + end end if isinfix prefix = prefix.args[end] diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl index 8bee70226755f..cfb9a6137a287 100644 --- a/stdlib/REPL/test/replcompletions.jl +++ b/stdlib/REPL/test/replcompletions.jl @@ -340,6 +340,12 @@ end # inexistent completion inside a cmd @test_nocompletion("run(`lol") +# issue 55856: copy(A'). errors in the REPL +let + c, r = test_complete("copy(A').") + @test isempty(c) +end + # test latex symbol completions let s = "\\alpha" c, r = test_bslashcomplete(s) From 319ee70104fa4f50b08c57fc202d5ab8d26f0d6b Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 21 Oct 2024 14:40:37 +0200 Subject: [PATCH 26/47] remove new references to explicit_loaded_modules --- base/loading.jl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index e08d03ad513c7..ef8837b36a1ca 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -2517,9 +2517,6 @@ loaded_modules_array() = @lock require_lock copy(loaded_modules_order) # after unreference_module, a subsequent require call will try to load a new copy of it, if stale # reload(m) = (unreference_module(m); require(m)) function unreference_module(key::PkgId) - if haskey(explicit_loaded_modules, key) - m = pop!(explicit_loaded_modules, key) - end if haskey(loaded_modules, key) m = pop!(loaded_modules, key) # need to ensure all modules are GC rooted; will still be referenced @@ -3125,7 +3122,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in # build up the list of modules that we want the precompile process to preserve if keep_loaded_modules concrete_deps = copy(_concrete_dependencies) - for (pkgreq, modreq) in loaded_modules # TODO: convert all relevant staleness heuristics to use explicit_loaded_modules instead + for (pkgreq, modreq) in loaded_modules if !(pkgreq === Main || pkgreq === Core || pkgreq === Base) push!(concrete_deps, pkgreq => module_build_id(modreq)) end From 8bdacc341a740b73d5e11b3ba548cd97bebab6c6 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 21 Oct 2024 09:17:03 -0400 Subject: [PATCH 27/47] Add basic infrastructure for binding replacement (#56224) Now that I've had a few months to recover from the slog of adding `BindingPartition`, it's time to renew my quest to finish #54654. This adds the basic infrastructure for having multiple partitions, including making the lookup respect the `world` argument - on-demand allocation of missing partitions, `Base.delete_binding` and the `@world` macro. Not included is any inference or invalidation support, or any support for the runtime to create partitions itself (only `Base.delete_binding` does that for now), which will come in subsequent PRs. --- base/essentials.jl | 44 +++++++++++++++++ base/range.jl | 11 +++++ base/runtime_internals.jl | 27 +++++++++-- base/show.jl | 18 +++++++ src/clangsa/GCChecker.cpp | 2 + src/julia.h | 11 +++-- src/julia_internal.h | 15 ++---- src/module.c | 99 +++++++++++++++++++++++++++++++-------- src/staticdata.c | 9 ++-- src/toplevel.c | 2 + test/choosetests.jl | 2 +- test/rebinding.jl | 18 +++++++ 12 files changed, 214 insertions(+), 44 deletions(-) create mode 100644 test/rebinding.jl diff --git a/base/essentials.jl b/base/essentials.jl index 0e7be924c908c..a07aaa6769ed2 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -1250,6 +1250,50 @@ function isiterable(T)::Bool return hasmethod(iterate, Tuple{T}) end +""" + @world(sym, world) + +Resolve the binding `sym` in world `world`. See [`invoke_in_world`](@ref) for running +arbitrary code in fixed worlds. `world` may be `UnitRange`, in which case the macro +will error unless the binding is valid and has the same value across the entire world +range. + +The `@world` macro is primarily used in the priniting of bindings that are no longer available +in the current world. + +## Example +``` +julia> struct Foo; a::Int; end +Foo + +julia> fold = Foo(1) + +julia> Int(Base.get_world_counter()) +26866 + +julia> struct Foo; a::Int; b::Int end +Foo + +julia> fold +@world(Foo, 26866)(1) +``` + +!!! compat "Julia 1.12" + This functionality requires at least Julia 1.12. +""" +macro world(sym, world) + if isa(sym, Symbol) + return :($(_resolve_in_world)($world, $(QuoteNode(GlobalRef(__module__, sym))))) + elseif isa(sym, GlobalRef) + return :($(_resolve_in_world)($world, $(QuoteNode(sym)))) + else + error("`@world` requires a symbol or GlobalRef") + end +end + +_resolve_in_world(world::Integer, gr::GlobalRef) = + invoke_in_world(UInt(world), Core.getglobal, gr.mod, gr.name) + # Special constprop heuristics for various binary opes typename(typeof(function + end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC typename(typeof(function - end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC diff --git a/base/range.jl b/base/range.jl index 4b5d076dcf436..3301335785878 100644 --- a/base/range.jl +++ b/base/range.jl @@ -1680,3 +1680,14 @@ function show(io::IO, r::LogRange{T}) where {T} show(io, length(r)) print(io, ')') end + +# Implementation detail of @world +# The rest of this is defined in essentials.jl, but UnitRange is not available +function _resolve_in_world(worlds::UnitRange, gr::GlobalRef) + # Validate that this binding's reference covers the entire world range + bpart = lookup_binding_partition(first(worlds), gr) + if bpart.max_world < last(world) + error("Binding does not cover the full world range") + end + _resolve_in_world(last(world), gr) +end diff --git a/base/runtime_internals.jl b/base/runtime_internals.jl index 645aa55c538b4..ab867f8fcae6d 100644 --- a/base/runtime_internals.jl +++ b/base/runtime_internals.jl @@ -218,9 +218,10 @@ function _fieldnames(@nospecialize t) return t.name.names end -const BINDING_KIND_GLOBAL = 0x0 -const BINDING_KIND_CONST = 0x1 -const BINDING_KIND_CONST_IMPORT = 0x2 +# N.B.: Needs to be synced with julia.h +const BINDING_KIND_CONST = 0x0 +const BINDING_KIND_CONST_IMPORT = 0x1 +const BINDING_KIND_GLOBAL = 0x2 const BINDING_KIND_IMPLICIT = 0x3 const BINDING_KIND_EXPLICIT = 0x4 const BINDING_KIND_IMPORTED = 0x5 @@ -228,6 +229,8 @@ const BINDING_KIND_FAILED = 0x6 const BINDING_KIND_DECLARED = 0x7 const BINDING_KIND_GUARD = 0x8 +is_some_const_binding(kind::UInt8) = (kind == BINDING_KIND_CONST || kind == BINDING_KIND_CONST_IMPORT) + function lookup_binding_partition(world::UInt, b::Core.Binding) ccall(:jl_get_binding_partition, Ref{Core.BindingPartition}, (Any, UInt), b, world) end @@ -236,9 +239,27 @@ function lookup_binding_partition(world::UInt, gr::Core.GlobalRef) ccall(:jl_get_globalref_partition, Ref{Core.BindingPartition}, (Any, UInt), gr, world) end +partition_restriction(bpart::Core.BindingPartition) = ccall(:jl_bpart_get_restriction_value, Any, (Any,), bpart) + binding_kind(bpart::Core.BindingPartition) = ccall(:jl_bpart_get_kind, UInt8, (Any,), bpart) binding_kind(m::Module, s::Symbol) = binding_kind(lookup_binding_partition(tls_world_age(), GlobalRef(m, s))) +""" + delete_binding(mod::Module, sym::Symbol) + +Force the binding `mod.sym` to be undefined again, allowing it be redefined. +Note that this operation is very expensive, requirinig a full scan of all code in the system, +as well as potential recompilation of any methods that (may) have used binding +information. + +!!! warning + The implementation of this functionality is currently incomplete. Do not use + this method on versions that contain this disclaimer except for testing. +""" +function delete_binding(mod::Module, sym::Symbol) + ccall(:jl_disable_binding, Cvoid, (Any,), GlobalRef(mod, sym)) +end + """ fieldname(x::DataType, i::Integer) diff --git a/base/show.jl b/base/show.jl index 25ed99f50b5b0..3aeb267b4a696 100644 --- a/base/show.jl +++ b/base/show.jl @@ -1035,6 +1035,21 @@ function is_global_function(tn::Core.TypeName, globname::Union{Symbol,Nothing}) return false end +function check_world_bounded(tn::Core.TypeName) + bnd = ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), tn.module, tn.name, true) + isdefined(bnd, :partitions) || return nothing + partition = @atomic bnd.partitions + while true + if is_some_const_binding(binding_kind(partition)) && partition_restriction(partition) <: tn.wrapper + max_world = @atomic partition.max_world + max_world == typemax(UInt) && return nothing + return Int(partition.min_world):Int(max_world) + end + isdefined(partition, :next) || return nothing + partition = @atomic partition.next + end +end + function show_type_name(io::IO, tn::Core.TypeName) if tn === UnionAll.name # by coincidence, `typeof(Type)` is a valid representation of the UnionAll type. @@ -1063,7 +1078,10 @@ function show_type_name(io::IO, tn::Core.TypeName) end end end + world = check_world_bounded(tn) + world !== nothing && print(io, "@world(") show_sym(io, sym) + world !== nothing && print(io, ", ", world, ")") quo && print(io, ")") globfunc && print(io, ")") nothing diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp index 4892ebdabd110..830fe322a0a38 100644 --- a/src/clangsa/GCChecker.cpp +++ b/src/clangsa/GCChecker.cpp @@ -824,6 +824,7 @@ bool GCChecker::isGCTrackedType(QualType QT) { Name.ends_with_insensitive("jl_tupletype_t") || Name.ends_with_insensitive("jl_gc_tracked_buffer_t") || Name.ends_with_insensitive("jl_binding_t") || + Name.ends_with_insensitive("jl_binding_partition_t") || Name.ends_with_insensitive("jl_ordereddict_t") || Name.ends_with_insensitive("jl_tvar_t") || Name.ends_with_insensitive("jl_typemap_t") || @@ -847,6 +848,7 @@ bool GCChecker::isGCTrackedType(QualType QT) { Name.ends_with_insensitive("jl_stenv_t") || Name.ends_with_insensitive("jl_varbinding_t") || Name.ends_with_insensitive("set_world") || + Name.ends_with_insensitive("jl_ptr_kind_union_t") || Name.ends_with_insensitive("jl_codectx_t")) { return true; } diff --git a/src/julia.h b/src/julia.h index 168ba0deff1ec..dd79dbb82c28d 100644 --- a/src/julia.h +++ b/src/julia.h @@ -620,6 +620,7 @@ typedef struct _jl_weakref_t { jl_value_t *value; } jl_weakref_t; +// N.B: Needs to be synced with runtime_internals.jl enum jl_partition_kind { // Constant: This binding partition is a constant declared using `const` // ->restriction holds the constant value @@ -684,7 +685,7 @@ typedef struct __attribute__((aligned(8))) _jl_binding_partition_t { _Atomic(jl_ptr_kind_union_t) restriction; size_t min_world; _Atomic(size_t) max_world; - _Atomic(struct _jl_binding_partition_t*) next; + _Atomic(struct _jl_binding_partition_t *) next; size_t reserved; // Reserved for ->kind. Currently this holds the low bits of ->restriction during serialization } jl_binding_partition_t; @@ -1845,8 +1846,8 @@ JL_DLLEXPORT jl_sym_t *jl_symbol_n(const char *str, size_t len) JL_NOTSAFEPOINT; JL_DLLEXPORT jl_sym_t *jl_gensym(void); JL_DLLEXPORT jl_sym_t *jl_tagged_gensym(const char *str, size_t len); JL_DLLEXPORT jl_sym_t *jl_get_root_symbol(void); -JL_DLLEXPORT jl_value_t *jl_get_binding_value(jl_binding_t *b JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT; -JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_const(jl_binding_t *b JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT; +JL_DLLEXPORT jl_value_t *jl_get_binding_value(jl_binding_t *b JL_PROPAGATES_ROOT); +JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_const(jl_binding_t *b JL_PROPAGATES_ROOT); JL_DLLEXPORT jl_value_t *jl_declare_const_gf(jl_binding_t *b, jl_module_t *mod, jl_sym_t *name); JL_DLLEXPORT jl_method_t *jl_method_def(jl_svec_t *argdata, jl_methtable_t *mt, jl_code_info_t *f, jl_module_t *module); JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world, jl_code_instance_t **cache); @@ -2008,8 +2009,8 @@ JL_DLLEXPORT jl_value_t *jl_checked_swap(jl_binding_t *b, jl_module_t *mod, jl_s JL_DLLEXPORT jl_value_t *jl_checked_replace(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *expected, jl_value_t *rhs); JL_DLLEXPORT jl_value_t *jl_checked_modify(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *op, jl_value_t *rhs); JL_DLLEXPORT jl_value_t *jl_checked_assignonce(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED); -JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT; -JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val2(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, enum jl_partition_kind) JL_NOTSAFEPOINT; +JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED); +JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val2(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, enum jl_partition_kind); JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from); JL_DLLEXPORT void jl_module_use(jl_module_t *to, jl_module_t *from, jl_sym_t *s); JL_DLLEXPORT void jl_module_use_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname); diff --git a/src/julia_internal.h b/src/julia_internal.h index 4f735029da444..8c4ee9fca36e0 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -888,13 +888,10 @@ EXTERN_INLINE_DECLARE enum jl_partition_kind decode_restriction_kind(jl_ptr_kind #endif } -STATIC_INLINE jl_value_t *decode_restriction_value(jl_ptr_kind_union_t pku) JL_NOTSAFEPOINT +STATIC_INLINE jl_value_t *decode_restriction_value(jl_ptr_kind_union_t JL_PROPAGATES_ROOT pku) JL_NOTSAFEPOINT { #ifdef _P64 jl_value_t *val = (jl_value_t*)(pku & ~0x7); - // This is a little bit of a lie at the moment - it is one of the things that - // can go wrong with binding replacement. - JL_GC_PROMISE_ROOTED(val); return val; #else return pku.val; @@ -928,14 +925,8 @@ STATIC_INLINE int jl_bkind_is_some_guard(enum jl_partition_kind kind) JL_NOTSAFE return kind == BINDING_KIND_FAILED || kind == BINDING_KIND_GUARD || kind == BINDING_KIND_DECLARED; } -EXTERN_INLINE_DECLARE jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b, size_t world) JL_NOTSAFEPOINT { - if (!b) - return NULL; - assert(jl_is_binding(b)); - return jl_atomic_load_relaxed(&b->partitions); -} - -JL_DLLEXPORT jl_binding_partition_t *jl_get_globalref_partition(jl_globalref_t *gr, size_t world); +JL_DLLEXPORT jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b JL_PROPAGATES_ROOT, size_t world); +JL_DLLEXPORT jl_binding_partition_t *jl_get_globalref_partition(jl_globalref_t *gr JL_PROPAGATES_ROOT, size_t world); EXTERN_INLINE_DECLARE uint8_t jl_bpart_get_kind(jl_binding_partition_t *bpart) JL_NOTSAFEPOINT { return decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)); diff --git a/src/module.c b/src/module.c index 8dbac950235ee..f1098e22ff522 100644 --- a/src/module.c +++ b/src/module.c @@ -13,10 +13,51 @@ extern "C" { #endif // In this translation unit and this translation unit only emit this symbol `extern` for use by julia -EXTERN_INLINE_DEFINE jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b, size_t world) JL_NOTSAFEPOINT; EXTERN_INLINE_DEFINE uint8_t jl_bpart_get_kind(jl_binding_partition_t *bpart) JL_NOTSAFEPOINT; extern inline enum jl_partition_kind decode_restriction_kind(jl_ptr_kind_union_t pku) JL_NOTSAFEPOINT; +static jl_binding_partition_t *new_binding_partition(void) +{ + jl_binding_partition_t *bpart = (jl_binding_partition_t*)jl_gc_alloc(jl_current_task->ptls, sizeof(jl_binding_partition_t), jl_binding_partition_type); + jl_atomic_store_relaxed(&bpart->restriction, encode_restriction(NULL, BINDING_KIND_GUARD)); + bpart->min_world = 0; + jl_atomic_store_relaxed(&bpart->max_world, (size_t)-1); + jl_atomic_store_relaxed(&bpart->next, NULL); +#ifdef _P64 + bpart->reserved = 0; +#endif + return bpart; +} + +jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b, size_t world) { + if (!b) + return NULL; + assert(jl_is_binding(b)); + jl_value_t *parent = (jl_value_t*)b; + _Atomic(jl_binding_partition_t *)*insert = &b->partitions; + jl_binding_partition_t *bpart = jl_atomic_load_relaxed(insert); + size_t max_world = (size_t)-1; + while (1) { + while (bpart && world < bpart->min_world) { + insert = &bpart->next; + max_world = bpart->min_world - 1; + parent = (jl_value_t *)bpart; + bpart = jl_atomic_load_relaxed(&bpart->next); + } + if (bpart && world <= jl_atomic_load_relaxed(&bpart->max_world)) + return bpart; + jl_binding_partition_t *new_bpart = new_binding_partition(); + jl_atomic_store_relaxed(&new_bpart->next, bpart); + if (bpart) + new_bpart->min_world = jl_atomic_load_relaxed(&bpart->max_world) + 1; + jl_atomic_store_relaxed(&new_bpart->max_world, max_world); + if (jl_atomic_cmpswap(insert, &bpart, new_bpart)) { + jl_gc_wb(parent, new_bpart); + return new_bpart; + } + } +} + JL_DLLEXPORT jl_binding_partition_t *jl_get_globalref_partition(jl_globalref_t *gr, size_t world) { if (!gr) @@ -188,19 +229,6 @@ static jl_globalref_t *jl_new_globalref(jl_module_t *mod, jl_sym_t *name, jl_bin return g; } -static jl_binding_partition_t *new_binding_partition(void) -{ - jl_binding_partition_t *bpart = (jl_binding_partition_t*)jl_gc_alloc(jl_current_task->ptls, sizeof(jl_binding_partition_t), jl_binding_partition_type); - jl_atomic_store_relaxed(&bpart->restriction, encode_restriction(NULL, BINDING_KIND_GUARD)); - bpart->min_world = 0; - jl_atomic_store_relaxed(&bpart->max_world, (size_t)-1); - jl_atomic_store_relaxed(&bpart->next, NULL); -#ifdef _P64 - bpart->reserved = 0; -#endif - return bpart; -} - static jl_binding_t *new_binding(jl_module_t *mod, jl_sym_t *name) { jl_task_t *ct = jl_current_task; @@ -215,9 +243,7 @@ static jl_binding_t *new_binding(jl_module_t *mod, jl_sym_t *name) JL_GC_PUSH1(&b); b->globalref = jl_new_globalref(mod, name, b); jl_gc_wb(b, b->globalref); - jl_binding_partition_t *bpart = new_binding_partition(); - jl_atomic_store_relaxed(&b->partitions, bpart); - jl_gc_wb(b, bpart); + jl_atomic_store_relaxed(&b->partitions, NULL); JL_GC_POP(); return b; } @@ -324,6 +350,12 @@ JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_const(jl_binding_t *b) return decode_restriction_value(pku); } +JL_DLLEXPORT jl_value_t *jl_bpart_get_restriction_value(jl_binding_partition_t *bpart) +{ + jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction); + return decode_restriction_value(pku); +} + typedef struct _modstack_t { jl_module_t *m; jl_sym_t *var; @@ -947,6 +979,28 @@ JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var jl_gc_wb(bpart, val); } +extern jl_mutex_t world_counter_lock; +JL_DLLEXPORT void jl_disable_binding(jl_globalref_t *gr) +{ + jl_binding_t *b = gr->binding; + b = jl_resolve_owner(b, gr->mod, gr->name, NULL); + jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age); + + if (decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_GUARD) { + // Already guard + return; + } + + JL_LOCK(&world_counter_lock); + jl_task_t *ct = jl_current_task; + size_t new_max_world = jl_atomic_load_acquire(&jl_world_counter); + // TODO: Trigger invalidation here + (void)ct; + jl_atomic_store_release(&bpart->max_world, new_max_world); + jl_atomic_store_release(&jl_world_counter, new_max_world + 1); + JL_UNLOCK(&world_counter_lock); +} + JL_DLLEXPORT int jl_globalref_is_const(jl_globalref_t *gr) { jl_binding_t *b = gr->binding; @@ -1018,13 +1072,17 @@ void jl_binding_deprecation_warning(jl_module_t *m, jl_sym_t *s, jl_binding_t *b jl_value_t *jl_check_binding_wr(jl_binding_t *b JL_PROPAGATES_ROOT, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED, int reassign) { + JL_GC_PUSH1(&rhs); // callee-rooted jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age); jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction); assert(!jl_bkind_is_some_guard(decode_restriction_kind(pku)) && !jl_bkind_is_some_import(decode_restriction_kind(pku))); if (jl_bkind_is_some_constant(decode_restriction_kind(pku))) { jl_value_t *old = decode_restriction_value(pku); - if (jl_egal(rhs, old)) + JL_GC_PROMISE_ROOTED(old); + if (jl_egal(rhs, old)) { + JL_GC_POP(); return NULL; + } if (jl_typeof(rhs) == jl_typeof(old)) jl_errorf("invalid redefinition of constant %s.%s. This redefinition may be permitted using the `const` keyword.", jl_symbol_name(mod->name), jl_symbol_name(var)); @@ -1033,13 +1091,13 @@ jl_value_t *jl_check_binding_wr(jl_binding_t *b JL_PROPAGATES_ROOT, jl_module_t jl_symbol_name(mod->name), jl_symbol_name(var)); } jl_value_t *old_ty = decode_restriction_value(pku); + JL_GC_PROMISE_ROOTED(old_ty); if (old_ty != (jl_value_t*)jl_any_type && jl_typeof(rhs) != old_ty) { - JL_GC_PUSH1(&rhs); // callee-rooted if (!jl_isa(rhs, old_ty)) jl_errorf("cannot assign an incompatible value to the global %s.%s.", jl_symbol_name(mod->name), jl_symbol_name(var)); - JL_GC_POP(); } + JL_GC_POP(); return old_ty; } @@ -1076,6 +1134,7 @@ JL_DLLEXPORT jl_value_t *jl_checked_modify(jl_binding_t *b, jl_module_t *mod, jl jl_errorf("invalid redefinition of constant %s.%s", jl_symbol_name(mod->name), jl_symbol_name(var)); jl_value_t *ty = decode_restriction_value(pku); + JL_GC_PROMISE_ROOTED(ty); return modify_value(ty, &b->value, (jl_value_t*)b, op, rhs, 1, mod, var); } diff --git a/src/staticdata.c b/src/staticdata.c index af4527cbc143f..af3477a25128e 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -3883,9 +3883,12 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl if ((jl_value_t*)b == jl_nothing) continue; jl_binding_partition_t *bpart = jl_atomic_load_relaxed(&b->partitions); - jl_atomic_store_relaxed(&bpart->restriction, - encode_restriction((jl_value_t*)jl_atomic_load_relaxed(&bpart->restriction), bpart->reserved)); - bpart->reserved = 0; + while (bpart) { + jl_atomic_store_relaxed(&bpart->restriction, + encode_restriction((jl_value_t*)jl_atomic_load_relaxed(&bpart->restriction), bpart->reserved)); + bpart->reserved = 0; + bpart = jl_atomic_load_relaxed(&bpart->next); + } } #endif } diff --git a/src/toplevel.c b/src/toplevel.c index 8caa8b086ec00..6f2e0cf77568a 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -318,6 +318,7 @@ void jl_binding_set_type(jl_binding_t *b, jl_module_t *mod, jl_sym_t *sym, jl_va jl_symbol_name(mod->name), jl_symbol_name(sym)); } jl_value_t *old_ty = decode_restriction_value(pku); + JL_GC_PROMISE_ROOTED(old_ty); if (!jl_types_equal(ty, old_ty)) { jl_errorf("cannot set type for global %s.%s. It already has a value or is already set to a different type.", jl_symbol_name(mod->name), jl_symbol_name(sym)); @@ -738,6 +739,7 @@ JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val2(jl_binding_t *b, j if (!val) return bpart; jl_value_t *old = decode_restriction_value(pku); + JL_GC_PROMISE_ROOTED(old); if (jl_egal(val, old)) break; if (!did_warn) { diff --git a/test/choosetests.jl b/test/choosetests.jl index 96d230d185c71..affdee412bd86 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -29,7 +29,7 @@ const TESTNAMES = [ "channels", "iostream", "secretbuffer", "specificity", "reinterpretarray", "syntax", "corelogging", "missing", "asyncmap", "smallarrayshrink", "opaque_closure", "filesystem", "download", - "scopedvalues", "compileall" + "scopedvalues", "compileall", "rebinding" ] const INTERNET_REQUIRED_LIST = [ diff --git a/test/rebinding.jl b/test/rebinding.jl new file mode 100644 index 0000000000000..4066d91bc4b9b --- /dev/null +++ b/test/rebinding.jl @@ -0,0 +1,18 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +module Rebinding + using Test + + @test Base.binding_kind(@__MODULE__, :Foo) == Base.BINDING_KIND_GUARD + struct Foo + x::Int + end + x = Foo(1) + + @test Base.binding_kind(@__MODULE__, :Foo) == Base.BINDING_KIND_CONST + @test !contains(repr(x), "@world") + Base.delete_binding(@__MODULE__, :Foo) + + @test Base.binding_kind(@__MODULE__, :Foo) == Base.BINDING_KIND_GUARD + @test contains(repr(x), "@world") +end From 82e0e28d76621888f6e501033c59549cb0104bac Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Mon, 21 Oct 2024 19:48:49 +0530 Subject: [PATCH 28/47] Specialize `haszero` for `Union{Missing,<:Number}` (#56169) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since `zero(::Union{Missing,T})` calls `zero(T)` internally, we may use the same logic for `LinearAlgebra.haszero`. This helps with certain structured matrix operations: ```julia julia> M = Matrix{Union{Int,Missing}}(missing,2,2) 2×2 Matrix{Union{Missing, Int64}}: missing missing missing missing julia> triu(M) 2×2 Matrix{Union{Missing, Int64}}: missing missing 0 missing ``` whereas previously, this would have been ```julia julia> triu(M) 2×2 Matrix{Union{Missing, Int64}}: missing missing missing missing ``` --- stdlib/LinearAlgebra/src/dense.jl | 1 + stdlib/LinearAlgebra/test/triangular.jl | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl index aacc5479bfa9d..d8f2513f5bfc8 100644 --- a/stdlib/LinearAlgebra/src/dense.jl +++ b/stdlib/LinearAlgebra/src/dense.jl @@ -110,6 +110,7 @@ norm2(x::Union{Array{T},StridedVector{T}}) where {T<:BlasFloat} = # Conservative assessment of types that have zero(T) defined for themselves haszero(::Type) = false haszero(::Type{T}) where {T<:Number} = isconcretetype(T) +haszero(::Type{Union{Missing,T}}) where {T<:Number} = haszero(T) @propagate_inbounds _zero(M::AbstractArray{T}, inds...) where {T} = haszero(T) ? zero(T) : zero(M[inds...]) """ diff --git a/stdlib/LinearAlgebra/test/triangular.jl b/stdlib/LinearAlgebra/test/triangular.jl index 7acb3cbfc0c57..2ceda735dfd0a 100644 --- a/stdlib/LinearAlgebra/test/triangular.jl +++ b/stdlib/LinearAlgebra/test/triangular.jl @@ -1284,6 +1284,14 @@ end @test istril(U, k) == istril(A, k) end end + + @testset "Union eltype" begin + M = Matrix{Union{Int,Missing}}(missing,2,2) + U = triu(M) + @test iszero(U[2,1]) + U = tril(M) + @test iszero(U[1,2]) + end end @testset "indexing with a BandIndex" begin From 6d7e29f33018b8750ac1f2cf446a51910d25a1d7 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 21 Oct 2024 11:24:39 -0300 Subject: [PATCH 29/47] Add small tweaks to juliac script and misc fixes to juliac (#56119) This comments out an assert thats currently faulty and also marks apply_iterate as safe when we can special case it to not dispatch --- contrib/juliac-buildscript.jl | 22 ++++++++++++------ contrib/juliac.jl | 6 +++-- src/codegen.cpp | 42 ++++++++++++++++++++++++++++++----- 3 files changed, 56 insertions(+), 14 deletions(-) diff --git a/contrib/juliac-buildscript.jl b/contrib/juliac-buildscript.jl index 490bca86e1cba..0303e95f448b5 100644 --- a/contrib/juliac-buildscript.jl +++ b/contrib/juliac-buildscript.jl @@ -27,6 +27,7 @@ end (f::Base.RedirectStdStream)(io::Core.CoreSTDOUT) = Base._redirect_io_global(io, f.unix_fd) @eval Base begin + depwarn(msg, funcsym; force::Bool=false) = nothing _assert_tostring(msg) = "" reinit_stdio() = nothing JuliaSyntax.enable_in_core!() = nothing @@ -229,20 +230,15 @@ let loaded = Symbol.(Base.loaded_modules_array()) # TODO better way to do this using Artifacts @eval Artifacts begin function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, _::Val{lazyartifacts}) where lazyartifacts - moduleroot = Base.moduleroot(__module__) - if haskey(Base.module_keys, moduleroot) - # Process overrides for this UUID, if we know what it is - process_overrides(artifact_dict, Base.module_keys[moduleroot].uuid) - end - # If the artifact exists, we're in the happy path and we can immediately # return the path to the artifact: - dirs = artifact_paths(hash; honor_overrides=true) + dirs = artifacts_dirs(bytes2hex(hash.bytes)) for dir in dirs if isdir(dir) return jointail(dir, path_tail) end end + error("Artifact not found") end end end @@ -258,6 +254,18 @@ let loaded = Symbol.(Base.loaded_modules_array()) # TODO better way to do this __init__() = rand() end end + if :Markdown in loaded + using Markdown + @eval Markdown begin + __init__() = rand() + end + end + if :JuliaSyntaxHighlighting in loaded + using JuliaSyntaxHighlighting + @eval JuliaSyntaxHighlighting begin + __init__() = rand() + end + end end empty!(Core.ARGS) diff --git a/contrib/juliac.jl b/contrib/juliac.jl index 61e0e91958667..0f008976d2b4f 100644 --- a/contrib/juliac.jl +++ b/contrib/juliac.jl @@ -8,6 +8,7 @@ trim = nothing outname = nothing file = nothing add_ccallables = false +verbose = false help = findfirst(x->x == "--help", ARGS) if help !== nothing @@ -39,6 +40,8 @@ let i = 1 end elseif arg == "--compile-ccallable" global add_ccallables = true + elseif arg == "--verbose" + global verbose = true else if arg[1] == '-' || !isnothing(file) println("Unexpected argument `$arg`") @@ -77,9 +80,8 @@ open(initsrc_path, "w") do io end static_call_graph_arg() = isnothing(trim) ? `` : `--trim=$(trim)` -is_verbose() = verbose ? `--verbose-compilation=yes` : `` cmd = addenv(`$cmd --project=$(Base.active_project()) --output-o $img_path --output-incremental=no --strip-ir --strip-metadata $(static_call_graph_arg()) $(joinpath(@__DIR__,"juliac-buildscript.jl")) $absfile $output_type $add_ccallables`, "OPENBLAS_NUM_THREADS" => 1, "JULIA_NUM_THREADS" => 1) - +verbose && println("Running: $cmd") if !success(pipeline(cmd; stdout, stderr)) println(stderr, "\nFailed to compile $file") exit(1) diff --git a/src/codegen.cpp b/src/codegen.cpp index 0ab26a65fcaaa..eaa3cc8176ad5 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -2247,7 +2247,10 @@ static jl_array_t* build_stack_crumbs(jl_codectx_t &ctx) JL_NOTSAFEPOINT break; } if (caller) { - assert(ctx.emission_context.enqueuers.count(caller) == 1); + + // assert(ctx.emission_context.enqueuers.count(caller) == 1); + // Each enqueuer should only be enqueued at least once and only once. Check why this assert is triggering + // This isn't a fatal error, just means that we may get a wrong backtrace if (jl_is_method_instance(caller)) { //TODO: Use a subrange when C++20 is a thing for (auto it2 = std::get(it->second).begin(); it2 != (std::prev(std::get(it->second).end())); ++it2) { @@ -5732,10 +5735,34 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo // special case for some known builtin not handled by emit_builtin_call auto it = builtin_func_map().find(builtin_fptr); if (it != builtin_func_map().end()) { - if (trim_may_error(ctx.params->trim) && may_dispatch_builtins().count(builtin_fptr)) { - errs() << "ERROR: Dynamic call to builtin" << jl_symbol_name(((jl_datatype_t*)jl_typeof(f.constant))->name->name); - errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; - print_stacktrace(ctx, ctx.params->trim); + if (trim_may_error(ctx.params->trim)) { + bool may_dispatch = may_dispatch_builtins().count(builtin_fptr); + if (may_dispatch && f.constant == jl_builtin__apply_iterate && nargs >= 4) { + if (jl_subtype(argv[2].typ, (jl_value_t*)jl_builtin_type)) { + static jl_value_t *jl_dispatchfree_apply_iterate_type = NULL; + if (!jl_dispatchfree_apply_iterate_type) { + jl_value_t *types[5] = { + (jl_value_t *)jl_simplevector_type, + (jl_value_t *)jl_genericmemory_type, + (jl_value_t *)jl_array_type, + (jl_value_t *)jl_tuple_type, + (jl_value_t *)jl_namedtuple_type, + }; + jl_dispatchfree_apply_iterate_type = jl_as_global_root(jl_type_union(types, 5), 1); + } + for (size_t i = 3; i < nargs; i++) { + auto ai = argv[i].typ; + if (!jl_subtype(ai, jl_dispatchfree_apply_iterate_type)) + break; + } + may_dispatch = false; + } + } + if (may_dispatch) { + errs() << "ERROR: Dynamic call to builtin " << jl_symbol_name(((jl_datatype_t*)jl_typeof(f.constant))->name->name); + errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; + print_stacktrace(ctx, ctx.params->trim); + } } Value *ret = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), ArrayRef(argv).drop_front(), nargs - 1, julia_call); setName(ctx.emission_context, ret, it->second->name + "_ret"); @@ -5752,6 +5779,11 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo fptr = FunctionCallee(get_func_sig(ctx.builder.getContext()), ctx.builder.CreateCall(prepare_call(jlgetbuiltinfptr_func), {emit_typeof(ctx, f)})); cc = julia_call; } + if (trim_may_error(ctx.params->trim)) { + errs() << "ERROR: Dynamic call to unknown builtin"; + errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; + print_stacktrace(ctx, ctx.params->trim); + } Value *ret = emit_jlcall(ctx, fptr, nullptr, argv, nargs, cc); setName(ctx.emission_context, ret, "Builtin_ret"); return mark_julia_type(ctx, ret, true, rt); From 11ef8eb0be7b411ab8c2813789adf63af4793798 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Mon, 21 Oct 2024 20:13:53 +0530 Subject: [PATCH 30/47] Warn about negative size in array construction (#56262) After this, ```julia julia> zeros(-1) ERROR: ArgumentError: invalid GenericMemory size: the number of elements is either negative or too large for system address width [...] ``` The error message is updated to warn about possible negative sizes when creating arrays. Fixes https://github.com/JuliaLang/julia/issues/55446 --- src/genericmemory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/genericmemory.c b/src/genericmemory.c index ea52fca66ba48..5c48e3202493e 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -45,7 +45,7 @@ jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t is prod += nel; } if (nel >= MAXINTVAL || prod >= (wideint_t) MAXINTVAL) - jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: too large for system address width"); + jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: the number of elements is either negative or too large for system address width"); size_t tot = (size_t)prod + LLT_ALIGN(sizeof(jl_genericmemory_t),JL_SMALL_BYTE_ALIGNMENT); int pooled = tot <= GC_MAX_SZCLASS; From cba1cc022b05eef93f7145aeb363a6ab2c9c3e6c Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Mon, 21 Oct 2024 20:28:39 +0530 Subject: [PATCH 31/47] Revert "Reroute` (Upper/Lower)Triangular * Diagonal` through `__muldiag`" (#56267) Reverts JuliaLang/julia#55984 This PR was buggy, but the test failures as seen in https://buildkite.com/julialang/julia-master/builds/41300#0192adab-9d07-4900-8592-2d46aff26905 were not caught in the CI run for the PR as the tests are run randomly. Let's revert this for now. --- stdlib/LinearAlgebra/src/LinearAlgebra.jl | 2 - stdlib/LinearAlgebra/src/diagonal.jl | 158 +++++++++------------- stdlib/LinearAlgebra/test/diagonal.jl | 40 +----- 3 files changed, 66 insertions(+), 134 deletions(-) diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl index 88fc3476c9d7f..15354603943c2 100644 --- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl +++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl @@ -655,8 +655,6 @@ matprod_dest(A::StructuredMatrix, B::Diagonal, TS) = _matprod_dest_diag(A, TS) matprod_dest(A::Diagonal, B::StructuredMatrix, TS) = _matprod_dest_diag(B, TS) matprod_dest(A::Diagonal, B::Diagonal, TS) = _matprod_dest_diag(B, TS) _matprod_dest_diag(A, TS) = similar(A, TS) -_matprod_dest_diag(A::UnitUpperTriangular, TS) = UpperTriangular(similar(parent(A), TS)) -_matprod_dest_diag(A::UnitLowerTriangular, TS) = LowerTriangular(similar(parent(A), TS)) function _matprod_dest_diag(A::SymTridiagonal, TS) n = size(A, 1) ev = similar(A, TS, max(0, n-1)) diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl index 8ba4c3d457e83..6e8ce96259fc1 100644 --- a/stdlib/LinearAlgebra/src/diagonal.jl +++ b/stdlib/LinearAlgebra/src/diagonal.jl @@ -396,120 +396,82 @@ function lmul!(D::Diagonal, T::Tridiagonal) return T end -@inline function __muldiag_nonzeroalpha!(out, D::Diagonal, B, _add::MulAddMul) - @inbounds for j in axes(B, 2) - @simd for i in axes(B, 1) - _modify!(_add, D.diag[i] * B[i,j], out, (i,j)) - end - end - out -end -_maybe_unwrap_tri(out, A) = out, A -_maybe_unwrap_tri(out::UpperTriangular, A::UpperOrUnitUpperTriangular) = parent(out), parent(A) -_maybe_unwrap_tri(out::LowerTriangular, A::LowerOrUnitLowerTriangular) = parent(out), parent(A) -@inline function __muldiag_nonzeroalpha!(out, D::Diagonal, B::UpperOrLowerTriangular, _add::MulAddMul) - isunit = B isa Union{UnitUpperTriangular, UnitLowerTriangular} - # if both B and out have the same upper/lower triangular structure, - # we may directly read and write from the parents - out_maybeparent, B_maybeparent = _maybe_unwrap_tri(out, B) - for j in axes(B, 2) - if isunit - _modify!(_add, D.diag[j] * B[j,j], out, (j,j)) - end - rowrange = B isa UpperOrUnitUpperTriangular ? (1:min(j-isunit, size(B,1))) : (j+isunit:size(B,1)) - @inbounds @simd for i in rowrange - _modify!(_add, D.diag[i] * B_maybeparent[i,j], out_maybeparent, (i,j)) - end - end - out -end -function __muldiag!(out, D::Diagonal, B, _add::MulAddMul) +function __muldiag!(out, D::Diagonal, B, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} require_one_based_indexing(out, B) alpha, beta = _add.alpha, _add.beta if iszero(alpha) _rmul_or_fill!(out, beta) else - __muldiag_nonzeroalpha!(out, D, B, _add) - end - return out -end - -@inline function __muldiag_nonzeroalpha!(out, A, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} - beta = _add.beta - _add_aisone = MulAddMul{true,bis0,Bool,typeof(beta)}(true, beta) - @inbounds for j in axes(A, 2) - dja = _add(D.diag[j]) - @simd for i in axes(A, 1) - _modify!(_add_aisone, A[i,j] * dja, out, (i,j)) - end - end - out -end -@inline function __muldiag_nonzeroalpha!(out, A::UpperOrLowerTriangular, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} - isunit = A isa Union{UnitUpperTriangular, UnitLowerTriangular} - beta = _add.beta - # since alpha is multiplied to the diagonal element of D, - # we may skip alpha in the second multiplication by setting ais1 to true - _add_aisone = MulAddMul{true,bis0,Bool,typeof(beta)}(true, beta) - # if both A and out have the same upper/lower triangular structure, - # we may directly read and write from the parents - out_maybeparent, A_maybeparent = _maybe_unwrap_tri(out, A) - @inbounds for j in axes(A, 2) - dja = _add(D.diag[j]) - if isunit - _modify!(_add_aisone, A[j,j] * dja, out, (j,j)) - end - rowrange = A isa UpperOrUnitUpperTriangular ? (1:min(j-isunit, size(A,1))) : (j+isunit:size(A,1)) - @simd for i in rowrange - _modify!(_add_aisone, A_maybeparent[i,j] * dja, out_maybeparent, (i,j)) + if bis0 + @inbounds for j in axes(B, 2) + @simd for i in axes(B, 1) + out[i,j] = D.diag[i] * B[i,j] * alpha + end + end + else + @inbounds for j in axes(B, 2) + @simd for i in axes(B, 1) + out[i,j] = D.diag[i] * B[i,j] * alpha + out[i,j] * beta + end + end end end - out + return out end -function __muldiag!(out, A, D::Diagonal, _add::MulAddMul) +function __muldiag!(out, A, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} require_one_based_indexing(out, A) alpha, beta = _add.alpha, _add.beta if iszero(alpha) _rmul_or_fill!(out, beta) else - __muldiag_nonzeroalpha!(out, A, D, _add) + if bis0 + @inbounds for j in axes(A, 2) + dja = D.diag[j] * alpha + @simd for i in axes(A, 1) + out[i,j] = A[i,j] * dja + end + end + else + @inbounds for j in axes(A, 2) + dja = D.diag[j] * alpha + @simd for i in axes(A, 1) + out[i,j] = A[i,j] * dja + out[i,j] * beta + end + end + end end return out end - -@inline function __muldiag_nonzeroalpha!(out::Diagonal, D1::Diagonal, D2::Diagonal, _add::MulAddMul) +function __muldiag!(out::Diagonal, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} d1 = D1.diag d2 = D2.diag - outd = out.diag - @inbounds @simd for i in eachindex(d1, d2, outd) - _modify!(_add, d1[i] * d2[i], outd, i) - end - out -end -function __muldiag!(out::Diagonal, D1::Diagonal, D2::Diagonal, _add::MulAddMul) alpha, beta = _add.alpha, _add.beta if iszero(alpha) _rmul_or_fill!(out.diag, beta) else - __muldiag_nonzeroalpha!(out, D1, D2, _add) + if bis0 + @inbounds @simd for i in eachindex(out.diag) + out.diag[i] = d1[i] * d2[i] * alpha + end + else + @inbounds @simd for i in eachindex(out.diag) + out.diag[i] = d1[i] * d2[i] * alpha + out.diag[i] * beta + end + end end return out end -@inline function __muldiag_nonzeroalpha!(out, D1::Diagonal, D2::Diagonal, _add::MulAddMul) - d1 = D1.diag - d2 = D2.diag - @inbounds @simd for i in eachindex(d1, d2) - _modify!(_add, d1[i] * d2[i], out, (i,i)) - end - out -end -function __muldiag!(out, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1}) where {ais1} +function __muldiag!(out, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} require_one_based_indexing(out) alpha, beta = _add.alpha, _add.beta + mA = size(D1, 1) + d1 = D1.diag + d2 = D2.diag _rmul_or_fill!(out, beta) if !iszero(alpha) - _add_bis1 = MulAddMul{ais1,false,typeof(alpha),Bool}(alpha,true) - __muldiag_nonzeroalpha!(out, D1, D2, _add_bis1) + @inbounds @simd for i in 1:mA + out[i,i] += d1[i] * d2[i] * alpha + end end return out end @@ -696,21 +658,31 @@ for Tri in (:UpperTriangular, :LowerTriangular) @eval $fun(A::$Tri, D::Diagonal) = $Tri($fun(A.data, D)) @eval $fun(A::$UTri, D::Diagonal) = $Tri(_setdiag!($fun(A.data, D), $f, D.diag)) end - @eval *(A::$Tri{<:Any, <:StridedMaybeAdjOrTransMat}, D::Diagonal) = - @invoke *(A::AbstractMatrix, D::Diagonal) - @eval *(A::$UTri{<:Any, <:StridedMaybeAdjOrTransMat}, D::Diagonal) = - @invoke *(A::AbstractMatrix, D::Diagonal) for (fun, f) in zip((:*, :lmul!, :ldiv!, :\), (:identity, :identity, :inv, :inv)) @eval $fun(D::Diagonal, A::$Tri) = $Tri($fun(D, A.data)) @eval $fun(D::Diagonal, A::$UTri) = $Tri(_setdiag!($fun(D, A.data), $f, D.diag)) end - @eval *(D::Diagonal, A::$Tri{<:Any, <:StridedMaybeAdjOrTransMat}) = - @invoke *(D::Diagonal, A::AbstractMatrix) - @eval *(D::Diagonal, A::$UTri{<:Any, <:StridedMaybeAdjOrTransMat}) = - @invoke *(D::Diagonal, A::AbstractMatrix) # 3-arg ldiv! @eval ldiv!(C::$Tri, D::Diagonal, A::$Tri) = $Tri(ldiv!(C.data, D, A.data)) @eval ldiv!(C::$Tri, D::Diagonal, A::$UTri) = $Tri(_setdiag!(ldiv!(C.data, D, A.data), inv, D.diag)) + # 3-arg mul! is disambiguated in special.jl + # 5-arg mul! + @eval _mul!(C::$Tri, D::Diagonal, A::$Tri, _add) = $Tri(mul!(C.data, D, A.data, _add.alpha, _add.beta)) + @eval function _mul!(C::$Tri, D::Diagonal, A::$UTri, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} + α, β = _add.alpha, _add.beta + iszero(α) && return _rmul_or_fill!(C, β) + diag′ = bis0 ? nothing : diag(C) + data = mul!(C.data, D, A.data, α, β) + $Tri(_setdiag!(data, _add, D.diag, diag′)) + end + @eval _mul!(C::$Tri, A::$Tri, D::Diagonal, _add) = $Tri(mul!(C.data, A.data, D, _add.alpha, _add.beta)) + @eval function _mul!(C::$Tri, A::$UTri, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0} + α, β = _add.alpha, _add.beta + iszero(α) && return _rmul_or_fill!(C, β) + diag′ = bis0 ? nothing : diag(C) + data = mul!(C.data, A.data, D, α, β) + $Tri(_setdiag!(data, _add, D.diag, diag′)) + end end @inline function kron!(C::AbstractMatrix, A::Diagonal, B::Diagonal) diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl index 380a0465028d1..1c3a9dfa676ac 100644 --- a/stdlib/LinearAlgebra/test/diagonal.jl +++ b/stdlib/LinearAlgebra/test/diagonal.jl @@ -1188,7 +1188,7 @@ end @test oneunit(D3) isa typeof(D3) end -@testset "$Tri" for (Tri, UTri) in ((UpperTriangular, UnitUpperTriangular), (LowerTriangular, UnitLowerTriangular)) +@testset "AbstractTriangular" for (Tri, UTri) in ((UpperTriangular, UnitUpperTriangular), (LowerTriangular, UnitLowerTriangular)) A = randn(4, 4) TriA = Tri(A) UTriA = UTri(A) @@ -1218,44 +1218,6 @@ end @test outTri === mul!(outTri, D, UTriA, 2, 1)::Tri == mul!(out, D, Matrix(UTriA), 2, 1) @test outTri === mul!(outTri, TriA, D, 2, 1)::Tri == mul!(out, Matrix(TriA), D, 2, 1) @test outTri === mul!(outTri, UTriA, D, 2, 1)::Tri == mul!(out, Matrix(UTriA), D, 2, 1) - - # we may write to a Unit triangular if the diagonal is preserved - ID = Diagonal(ones(size(UTriA,2))) - @test mul!(copy(UTriA), UTriA, ID) == UTriA - @test mul!(copy(UTriA), ID, UTriA) == UTriA - - @testset "partly filled parents" begin - M = Matrix{BigFloat}(undef, 2, 2) - M[1,1] = M[2,2] = 3 - isupper = Tri == UpperTriangular - M[1+!isupper, 1+isupper] = 3 - D = Diagonal(1:2) - T = Tri(M) - TA = Array(T) - @test T * D == TA * D - @test D * T == D * TA - @test mul!(copy(T), T, D, 2, 3) == 2T * D + 3T - @test mul!(copy(T), D, T, 2, 3) == 2D * T + 3T - - U = UTri(M) - UA = Array(U) - @test U * D == UA * D - @test D * U == D * UA - @test mul!(copy(T), U, D, 2, 3) == 2 * UA * D + 3TA - @test mul!(copy(T), D, U, 2, 3) == 2 * D * UA + 3TA - - M2 = Matrix{BigFloat}(undef, 2, 2) - M2[1+!isupper, 1+isupper] = 3 - U = UTri(M2) - UA = Array(U) - @test U * D == UA * D - @test D * U == D * UA - ID = Diagonal(ones(size(U,2))) - @test mul!(copy(U), U, ID) == U - @test mul!(copy(U), ID, U) == U - @test mul!(copy(U), U, ID, 2, -1) == U - @test mul!(copy(U), ID, U, 2, -1) == U - end end struct SMatrix1{T} <: AbstractArray{T,2} From 2188ba4d70a349594484c927bc7a6e71edaa5902 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 21 Oct 2024 11:57:10 -0400 Subject: [PATCH 32/47] precompile: add error for using require_stdlib during precompile (#56233) This function could accidentally add a dependency on the stdlib in the user's package, which would make it immediately stale. As pointed out to me by topolarity --- base/loading.jl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index db6a681bb2a5b..190aca66f91ff 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1564,7 +1564,6 @@ function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any} end end -precompiling_package::Bool = false loading_extension::Bool = false precompiling_extension::Bool = false function run_extension_callbacks(extid::ExtensionId) @@ -2288,11 +2287,6 @@ For more details regarding code loading, see the manual sections on [modules](@r [parallel computing](@ref code-availability). """ function require(into::Module, mod::Symbol) - if into === Base.__toplevel__ && precompiling_package - # this error type needs to match the error type compilecache throws for non-125 errors. - error("`using/import $mod` outside of a Module detected. Importing a package outside of a module \ - is not allowed during package precompilation.") - end if _require_world_age[] != typemax(UInt) Base.invoke_in_world(_require_world_age[], __require, into, mod) else @@ -2301,6 +2295,10 @@ function require(into::Module, mod::Symbol) end function __require(into::Module, mod::Symbol) + if into === Base.__toplevel__ && generating_output(#=incremental=#true) + error("`using/import $mod` outside of a Module detected. Importing a package outside of a module \ + is not allowed during package precompilation.") + end @lock require_lock begin LOADING_CACHE[] = LoadingCache() try @@ -2709,6 +2707,10 @@ end [2] https://github.com/JuliaLang/StyledStrings.jl/issues/91#issuecomment-2379602914 """ function require_stdlib(package_uuidkey::PkgId, ext::Union{Nothing, String}=nothing) + if generating_output(#=incremental=#true) + # Otherwise this would lead to awkward dependency issues by loading a package that isn't in the Project/Manifest + error("This interactive function requires a stdlib to be loaded, and package code should instead use it directly from that stdlib.") + end @lock require_lock begin # the PkgId of the ext, or package if not an ext this_uuidkey = ext isa String ? PkgId(uuid5(package_uuidkey.uuid, ext), ext) : package_uuidkey @@ -3048,7 +3050,6 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, output_o:: empty!(Base.EXT_DORMITORY) # If we have a custom sysimage with `EXT_DORMITORY` prepopulated Base.track_nested_precomp($precomp_stack) Base.precompiling_extension = $(loading_extension | isext) - Base.precompiling_package = true Base.include_package_for_output($(pkg_str(pkg)), $(repr(abspath(input))), $(repr(depot_path)), $(repr(dl_load_path)), $(repr(load_path)), $deps, $(repr(source_path(nothing)))) """) From f9765410821dce0f2e5ce8a625fbdf9a52f02462 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Mon, 21 Oct 2024 22:26:09 +0530 Subject: [PATCH 33/47] Fix indexing for block triangular matrices (#56168) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assuming that block matrices contain tiled elements (i.e. all elements along a row have the same number of rows and similarly, all elements along a column have the same number of columns), we may generalize `diagzero` to accept arbitrary matrices. We may therefore use only the diagonal elements to generate the structural zeros. This was being assumed anyway in the individual methods. We also now use `diagzero` in indexing triangular matrices, so the following would work correctly: ```julia julia> M = reshape([ones(2,2), fill(2,4,2), fill(3,2,3), fill(4,4,3)],2,2) 2×2 Matrix{Matrix{Float64}}: [1.0 1.0; 1.0 1.0] … [3.0 3.0 3.0; 3.0 3.0 3.0] [2.0 2.0; 2.0 2.0; 2.0 2.0; 2.0 2.0] [4.0 4.0 4.0; 4.0 4.0 4.0; 4.0 4.0 4.0; 4.0 4.0 4.0] julia> U = UpperTriangular(M) 2×2 UpperTriangular{Matrix{Float64}, Matrix{Matrix{Float64}}}: [1.0 1.0; 1.0 1.0] … [3.0 3.0 3.0; 3.0 3.0 3.0] ⋅ [4.0 4.0 4.0; 4.0 4.0 4.0; 4.0 4.0 4.0; 4.0 4.0 4.0] julia> U[2,1] 4×2 Matrix{Float64}: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 julia> U[2,1] == zero(M[2,1]) true ``` This also changes ```julia julia> M = Matrix{Union{Int,Missing}}(missing,4,4) 4×4 Matrix{Union{Missing, Int64}}: missing missing missing missing missing missing missing missing missing missing missing missing missing missing missing missing julia> U = UpperTriangular(M) 4×4 UpperTriangular{Union{Missing, Int64}, Matrix{Union{Missing, Int64}}}: missing missing missing missing ⋅ missing missing missing ⋅ ⋅ missing missing ⋅ ⋅ ⋅ missing julia> U[3,1] # v"1.12.0-DEV.1373" missing ``` to ```julia julia> U[3,1] # this PR 0 ``` --- stdlib/LinearAlgebra/src/bidiag.jl | 11 ----------- stdlib/LinearAlgebra/src/diagonal.jl | 5 +++-- stdlib/LinearAlgebra/src/triangular.jl | 4 ++-- stdlib/LinearAlgebra/test/triangular.jl | 13 +++++++++++++ 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl index a34df37153cd2..b38a983296065 100644 --- a/stdlib/LinearAlgebra/src/bidiag.jl +++ b/stdlib/LinearAlgebra/src/bidiag.jl @@ -118,17 +118,6 @@ Bidiagonal(A::Bidiagonal) = A Bidiagonal{T}(A::Bidiagonal{T}) where {T} = A Bidiagonal{T}(A::Bidiagonal) where {T} = Bidiagonal{T}(A.dv, A.ev, A.uplo) -function diagzero(A::Bidiagonal{<:AbstractMatrix}, i, j) - Tel = eltype(A) - if i < j && A.uplo == 'U' #= top right zeros =# - return zeroslike(Tel, axes(A.ev[i], 1), axes(A.ev[j-1], 2)) - elseif j < i && A.uplo == 'L' #= bottom left zeros =# - return zeroslike(Tel, axes(A.ev[i-1], 1), axes(A.ev[j], 2)) - else - return zeroslike(Tel, axes(A.dv[i], 1), axes(A.dv[j], 2)) - end -end - _offdiagind(uplo) = uplo == 'U' ? 1 : -1 @inline function Base.isassigned(A::Bidiagonal, i::Int, j::Int) diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl index 6e8ce96259fc1..417bcfa5715b1 100644 --- a/stdlib/LinearAlgebra/src/diagonal.jl +++ b/stdlib/LinearAlgebra/src/diagonal.jl @@ -191,8 +191,9 @@ end Return the appropriate zero element `A[i, j]` corresponding to a banded matrix `A`. """ diagzero(A::AbstractMatrix, i, j) = zero(eltype(A)) -diagzero(D::Diagonal{M}, i, j) where {M<:AbstractMatrix} = - zeroslike(M, axes(D.diag[i], 1), axes(D.diag[j], 2)) +diagzero(A::AbstractMatrix{M}, i, j) where {M<:AbstractMatrix} = + zeroslike(M, axes(A[i,i], 1), axes(A[j,j], 2)) +diagzero(A::AbstractMatrix, inds...) = diagzero(A, to_indices(A, inds)...) # dispatching on the axes permits specializing on the axis types to return something other than an Array zeroslike(M::Type, ax::Vararg{Union{AbstractUnitRange, Integer}}) = zeroslike(M, ax) """ diff --git a/stdlib/LinearAlgebra/src/triangular.jl b/stdlib/LinearAlgebra/src/triangular.jl index 83ef221329d33..d6994f4b4dd58 100644 --- a/stdlib/LinearAlgebra/src/triangular.jl +++ b/stdlib/LinearAlgebra/src/triangular.jl @@ -233,7 +233,7 @@ Base.isstored(A::UpperOrLowerTriangular, i::Int, j::Int) = @propagate_inbounds getindex(A::Union{UnitLowerTriangular{T}, UnitUpperTriangular{T}}, i::Int, j::Int) where {T} = _shouldforwardindex(A, i, j) ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T)) @propagate_inbounds getindex(A::Union{LowerTriangular, UpperTriangular}, i::Int, j::Int) = - _shouldforwardindex(A, i, j) ? A.data[i,j] : _zero(A.data,j,i) + _shouldforwardindex(A, i, j) ? A.data[i,j] : diagzero(A,i,j) _shouldforwardindex(U::UpperTriangular, b::BandIndex) = b.band >= 0 _shouldforwardindex(U::LowerTriangular, b::BandIndex) = b.band <= 0 @@ -245,7 +245,7 @@ Base.@constprop :aggressive @propagate_inbounds function getindex(A::Union{UnitL _shouldforwardindex(A, b) ? A.data[b] : ifelse(b.band == 0, oneunit(T), zero(T)) end Base.@constprop :aggressive @propagate_inbounds function getindex(A::Union{LowerTriangular, UpperTriangular}, b::BandIndex) - _shouldforwardindex(A, b) ? A.data[b] : _zero(A.data, b) + _shouldforwardindex(A, b) ? A.data[b] : diagzero(A.data, b) end _zero_triangular_half_str(::Type{<:UpperOrUnitUpperTriangular}) = "lower" diff --git a/stdlib/LinearAlgebra/test/triangular.jl b/stdlib/LinearAlgebra/test/triangular.jl index 2ceda735dfd0a..678827ceac720 100644 --- a/stdlib/LinearAlgebra/test/triangular.jl +++ b/stdlib/LinearAlgebra/test/triangular.jl @@ -1330,6 +1330,19 @@ end end end +@testset "indexing uses diagzero" begin + @testset "block matrix" begin + M = reshape([zeros(2,2), zeros(4,2), zeros(2,3), zeros(4,3)],2,2) + U = UpperTriangular(M) + @test [size(x) for x in U] == [size(x) for x in M] + end + @testset "Union eltype" begin + M = Matrix{Union{Int,Missing}}(missing,4,4) + U = UpperTriangular(M) + @test iszero(U[3,1]) + end +end + @testset "addition/subtraction of mixed triangular" begin for A in (Hermitian(rand(4, 4)), Diagonal(rand(5))) for T in (UpperTriangular, LowerTriangular, From 1ba035da42110313b84e8edf2dccd3aa9a2a5082 Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Mon, 21 Oct 2024 15:16:53 -0400 Subject: [PATCH 34/47] trimming: don't abort where we used to resolve dynamic calls (#56271) This call resolution code was deleted in #56179 (rightfully so), but it should be a no-op until we implement this in inference. --- src/codegen.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index eaa3cc8176ad5..b0d5038024900 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -5806,7 +5806,8 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo } int failed_dispatch = !argv[0].constant; if (ctx.params->trim != JL_TRIM_NO) { - abort(); // this code path is unsound, unsafe, and probably bad + // TODO: Implement the last-minute call resolution that used to be here + // in inference instead. } if (failed_dispatch && trim_may_error(ctx.params->trim)) { From 08d11d041b22fe90380e56be4fb4d44aaf46ec85 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Tue, 22 Oct 2024 05:23:04 +0900 Subject: [PATCH 35/47] inference: fix inference error from constructing invalid `TypeVar` (#56264) - fixes JuliaLang/julia#56248 --- base/compiler/tfuncs.jl | 12 ++++++++++-- test/compiler/inference.jl | 8 ++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index a6b7e53c6f320..450cfdcfadf82 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -601,8 +601,16 @@ add_tfunc(svec, 0, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->SimpleVec return TypeVar end end - tv = TypeVar(nval, lb, ub) - return PartialTypeVar(tv, lb_certain, ub_certain) + lb_valid = lb isa Type || lb isa TypeVar + ub_valid = ub isa Type || ub isa TypeVar + if lb_valid && ub_valid + tv = TypeVar(nval, lb, ub) + return PartialTypeVar(tv, lb_certain, ub_certain) + elseif !lb_valid && lb_certain + return Union{} + elseif !ub_valid && ub_certain + return Union{} + end end return TypeVar end diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index 71f9da04baa4a..dd62e329962c6 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -6055,3 +6055,11 @@ f55916(::Vararg{T,T}) where {T} = "2" g55916(x) = f55916(x) # this shouldn't error @test only(code_typed(g55916, (Any,); optimize=false))[2] == Int + +# JuliaLang/julia#56248 +@test Base.infer_return_type() do + TypeVar(:Issue56248, 1) +end === Union{} +@test Base.infer_return_type() do + TypeVar(:Issue56248, Any, 1) +end === Union{} From 36593fdd3a4e8112798059bb2310a39bbfcf96ed Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Mon, 21 Oct 2024 22:41:34 -0400 Subject: [PATCH 36/47] add Pkg 1.11 news to HISTORY.md (#56277) Backport already on https://github.com/JuliaLang/julia/pull/56228 --- HISTORY.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/HISTORY.md b/HISTORY.md index aa7f9f0ccdad6..c3ca212453d07 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -136,6 +136,14 @@ Standard library changes `AnnotatedString` with various faces or other attributes applied ([#49586]). #### Package Manager +* It is now possible to specify "sources" for packages in a `[sources]` section in Project.toml. + This can be used to add non-registered normal or test dependencies. +* Pkg now obeys `[compat]` bounds for `julia` and raises an error if the version of the running Julia binary is incompatible with the bounds in `Project.toml`. + Pkg has always obeyed this compat when working with Registry packages. This change affects mostly local packages +* `pkg> add` and `Pkg.add` will now add compat entries for new direct dependencies if the active environment is a + package (has a `name` and `uuid` entry). +* Dependencies can now be directly added as weak deps or extras via the `pkg> add --weak/extra Foo` or + `Pkg.add("Foo", target=:weakdeps/:extras)` forms. #### LinearAlgebra * `cbrt(::AbstractMatrix{<:Real})` is now defined and returns real-valued matrix cube roots of real-valued matrices ([#50661]). From 31f7df648f750897e245d169639cb1264ebc7404 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 22 Oct 2024 07:36:17 +0200 Subject: [PATCH 37/47] Remove NewPM pass exports. (#56269) All ecosystem consumers have switched to the string-based API. --- src/codegen-stubs.c | 16 ---------------- src/jl_exported_funcs.inc | 15 --------------- src/llvm-julia-passes.inc | 30 +++++++++++++++--------------- src/llvm_api.cpp | 32 -------------------------------- src/pipeline.cpp | 22 +++++++++++----------- 5 files changed, 26 insertions(+), 89 deletions(-) diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c index 7ddb68fd6b036..98ac063ba36d6 100644 --- a/src/codegen-stubs.c +++ b/src/codegen-stubs.c @@ -110,22 +110,6 @@ JL_DLLEXPORT uint64_t jl_getUnwindInfo_fallback(uint64_t dwAddr) JL_DLLEXPORT void jl_register_passbuilder_callbacks_fallback(void *PB) { } -#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \ - JL_DLLEXPORT void LLVMExtraMPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE -#define CGSCC_PASS(NAME, CLASS, CREATE_PASS) \ - JL_DLLEXPORT void LLVMExtraCGPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE -#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \ - JL_DLLEXPORT void LLVMExtraFPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE -#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \ - JL_DLLEXPORT void LLVMExtraLPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE - -#include "llvm-julia-passes.inc" - -#undef MODULE_PASS -#undef CGSCC_PASS -#undef FUNCTION_PASS -#undef LOOP_PASS - //LLVM C api to the julia JIT JL_DLLEXPORT void* JLJITGetLLVMOrcExecutionSession_fallback(void* JIT) UNAVAILABLE diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index f712f154ed896..71a78b1c20fc7 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -547,21 +547,6 @@ YY(jl_getUnwindInfo) \ YY(jl_get_libllvm) \ YY(jl_register_passbuilder_callbacks) \ - YY(LLVMExtraMPMAddCPUFeaturesPass) \ - YY(LLVMExtraMPMAddRemoveNIPass) \ - YY(LLVMExtraMPMAddMultiVersioningPass) \ - YY(LLVMExtraMPMAddRemoveJuliaAddrspacesPass) \ - YY(LLVMExtraMPMAddRemoveAddrspacesPass) \ - YY(LLVMExtraMPMAddLowerPTLSPass) \ - YY(LLVMExtraFPMAddDemoteFloat16Pass) \ - YY(LLVMExtraFPMAddLateLowerGCPass) \ - YY(LLVMExtraFPMAddAllocOptPass) \ - YY(LLVMExtraFPMAddPropagateJuliaAddrspacesPass) \ - YY(LLVMExtraFPMAddLowerExcHandlersPass) \ - YY(LLVMExtraFPMAddGCInvariantVerifierPass) \ - YY(LLVMExtraFPMAddFinalLowerGCPass) \ - YY(LLVMExtraLPMAddJuliaLICMPass) \ - YY(LLVMExtraLPMAddLowerSIMDLoopPass) \ YY(JLJITGetLLVMOrcExecutionSession) \ YY(JLJITGetJuliaOJIT) \ YY(JLJITGetExternalJITDylib) \ diff --git a/src/llvm-julia-passes.inc b/src/llvm-julia-passes.inc index c41ecbba87b6a..523c9fbcd3402 100644 --- a/src/llvm-julia-passes.inc +++ b/src/llvm-julia-passes.inc @@ -1,26 +1,26 @@ //Module passes #ifdef MODULE_PASS -MODULE_PASS("CPUFeatures", CPUFeaturesPass, CPUFeaturesPass()) -MODULE_PASS("RemoveNI", RemoveNIPass, RemoveNIPass()) -MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass, MultiVersioningPass()) -MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass, RemoveJuliaAddrspacesPass()) -MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass, RemoveAddrspacesPass()) -MODULE_PASS("LowerPTLSPass", LowerPTLSPass, LowerPTLSPass()) +MODULE_PASS("CPUFeatures", CPUFeaturesPass()) +MODULE_PASS("RemoveNI", RemoveNIPass()) +MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass()) +MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass()) +MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass()) +MODULE_PASS("LowerPTLSPass", LowerPTLSPass()) #endif //Function passes #ifdef FUNCTION_PASS -FUNCTION_PASS("DemoteFloat16", DemoteFloat16Pass, DemoteFloat16Pass()) -FUNCTION_PASS("LateLowerGCFrame", LateLowerGCPass, LateLowerGCPass()) -FUNCTION_PASS("AllocOpt", AllocOptPass, AllocOptPass()) -FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass, PropagateJuliaAddrspacesPass()) -FUNCTION_PASS("LowerExcHandlers", LowerExcHandlersPass, LowerExcHandlersPass()) -FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass, GCInvariantVerifierPass()) -FUNCTION_PASS("FinalLowerGC", FinalLowerGCPass, FinalLowerGCPass()) +FUNCTION_PASS("DemoteFloat16", DemoteFloat16Pass()) +FUNCTION_PASS("LateLowerGCFrame", LateLowerGCPass()) +FUNCTION_PASS("AllocOpt", AllocOptPass()) +FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass()) +FUNCTION_PASS("LowerExcHandlers", LowerExcHandlersPass()) +FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass()) +FUNCTION_PASS("FinalLowerGC", FinalLowerGCPass()) #endif //Loop passes #ifdef LOOP_PASS -LOOP_PASS("JuliaLICM", JuliaLICMPass, JuliaLICMPass()) -LOOP_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass()) +LOOP_PASS("JuliaLICM", JuliaLICMPass()) +LOOP_PASS("LowerSIMDLoop", LowerSIMDLoopPass()) #endif diff --git a/src/llvm_api.cpp b/src/llvm_api.cpp index e98c375b711b3..8c48b5661f984 100644 --- a/src/llvm_api.cpp +++ b/src/llvm_api.cpp @@ -10,7 +10,6 @@ #endif #include "jitlayers.h" -#include "passes.h" #include #include @@ -58,14 +57,6 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::IRCompileLayer, LLVMOrcIRCompileLayerRef DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::MaterializationResponsibility, LLVMOrcMaterializationResponsibilityRef) -typedef struct LLVMOpaqueModulePassManager *LLVMModulePassManagerRef; -typedef struct LLVMOpaqueFunctionPassManager *LLVMFunctionPassManagerRef; -typedef struct LLVMOpaqueLoopPassManager *LLVMLoopPassManagerRef; - -DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::ModulePassManager, LLVMModulePassManagerRef) -DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::FunctionPassManager, LLVMFunctionPassManagerRef) -DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::LoopPassManager, LLVMLoopPassManagerRef) - extern "C" { JL_DLLEXPORT_CODEGEN JuliaOJITRef JLJITGetJuliaOJIT_impl(void) @@ -150,27 +141,4 @@ JLJITGetIRCompileLayer_impl(JuliaOJITRef JIT) return wrap(&unwrap(JIT)->getIRCompileLayer()); } -#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \ - JL_DLLEXPORT_CODEGEN void LLVMExtraMPMAdd##CLASS##_impl(LLVMModulePassManagerRef PM) \ - { \ - unwrap(PM)->addPass(CREATE_PASS); \ - } -#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \ - JL_DLLEXPORT_CODEGEN void LLVMExtraFPMAdd##CLASS##_impl(LLVMFunctionPassManagerRef PM) \ - { \ - unwrap(PM)->addPass(CREATE_PASS); \ - } -#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \ - JL_DLLEXPORT_CODEGEN void LLVMExtraLPMAdd##CLASS##_impl(LLVMLoopPassManagerRef PM) \ - { \ - unwrap(PM)->addPass(CREATE_PASS); \ - } - -#include "llvm-julia-passes.inc" - -#undef MODULE_PASS -#undef CGSCC_PASS -#undef FUNCTION_PASS -#undef LOOP_PASS - } // extern "C" diff --git a/src/pipeline.cpp b/src/pipeline.cpp index f8935070bb001..f8976099ee53c 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -617,29 +617,29 @@ namespace { void adjustPIC(PassInstrumentationCallbacks &PIC) JL_NOTSAFEPOINT { //Borrowed from LLVM PassBuilder.cpp:386 -#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \ +#define MODULE_PASS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +#define MODULE_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS) \ PIC.addClassToPassName(CLASS, NAME); #define MODULE_ANALYSIS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \ +#define FUNCTION_PASS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS) \ PIC.addClassToPassName(CLASS, NAME); #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOPNEST_PASS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \ +#define LOOP_PASS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS) \ PIC.addClassToPassName(CLASS, NAME); #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define CGSCC_PASS(NAME, CLASS, CREATE_PASS) \ +#define CGSCC_PASS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +#define CGSCC_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS) \ PIC.addClassToPassName(CLASS, NAME); #define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); @@ -899,7 +899,7 @@ static void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { PB.registerPipelineParsingCallback( [](StringRef Name, FunctionPassManager &PM, ArrayRef InnerPipeline) { -#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } +#define FUNCTION_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } #include "llvm-julia-passes.inc" #undef FUNCTION_PASS if (Name.consume_front("GCInvariantVerifier")) { @@ -921,7 +921,7 @@ static void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { PB.registerPipelineParsingCallback( [](StringRef Name, ModulePassManager &PM, ArrayRef InnerPipeline) { -#define MODULE_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } +#define MODULE_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } #include "llvm-julia-passes.inc" #undef MODULE_PASS if (Name.consume_front("LowerPTLSPass")) { @@ -964,7 +964,7 @@ static void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { PB.registerPipelineParsingCallback( [](StringRef Name, LoopPassManager &PM, ArrayRef InnerPipeline) { -#define LOOP_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } +#define LOOP_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } #include "llvm-julia-passes.inc" #undef LOOP_PASS return false; From 7d4b2b78e66045a7249a96d195621f2b7d20c1fd Mon Sep 17 00:00:00 2001 From: Benjamin Lorenz Date: Tue, 22 Oct 2024 15:27:42 +0200 Subject: [PATCH 38/47] jitlayers: use std::make_tuple instead of tuple constructor (#56287) this should be safer for the type deduction and fixes a build error for macos on Yggdrasil (https://github.com/JuliaPackaging/Yggdrasil/pull/9660): ``` src/jitlayers.cpp:665:54: error: no viable constructor or deduction guide for deduction of template arguments of 'tuple' 665 | incompletemodules.insert(std::pair(codeinst, std::tuple(std::move(params), waiting))); ``` The Yggdrasil environment is a bit special with a rather new clang (version 17) but an old macos sdk and I don't know exactly in which circumstances this triggers. But I think `std::make_tuple` should be more reliable when the tuple types are not specified. cc: @fingolfin --- src/jitlayers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 8b8004af03616..c8d8356687dcf 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -662,7 +662,7 @@ static void jl_emit_codeinst_to_jit( int waiting = jl_analyze_workqueue(codeinst, params); if (waiting) { auto release = std::move(params.tsctx_lock); // unlock again before moving from it - incompletemodules.insert(std::pair(codeinst, std::tuple(std::move(params), waiting))); + incompletemodules.insert(std::pair(codeinst, std::make_tuple(std::move(params), waiting))); } else { finish_params(result_m.getModuleUnlocked(), params); From ab22f982427184b0a50ba407e4f1cbedbc862ced Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 22 Oct 2024 09:49:42 -0400 Subject: [PATCH 39/47] move time_imports and trace_* macros to Base but remain owned by InteractiveUtils (#56276) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This way all packages can be timed including InteractiveUtils and its deps (Base64, JuliaSyntaxHighlighting, Markdown, StyledStrings). With this PR ``` % ./julia --start=no -e "@time Base.@time_imports using REPL" 41.8 ms StyledStrings ┌ 0.1 ms JuliaSyntaxHighlighting.__init__() 14.2 ms JuliaSyntaxHighlighting 1.0 ms Base64 ┌ 0.0 ms Markdown.__init__() 9.6 ms Markdown 2.2 ms InteractiveUtils 0.3 ms Unicode ┌ 0.0 ms REPL.REPLCompletions.__init__() ├ 0.0 ms REPL.__init__() 95.7 ms REPL 0.225907 seconds (290.95 k allocations: 16.761 MiB) ``` Otherwise ``` % ./julia --start=no -e "using InteractiveUtils; @time @time_imports using REPL" 0.5 ms Unicode ┌ 0.0 ms REPL.REPLCompletions.__init__() ├ 0.1 ms REPL.__init__() 107.5 ms REPL 0.127016 seconds (164.18 k allocations: 9.199 MiB) ``` Also the `@trace_compile` and `@trace_dispatch` macros for the same reason. --- base/timing.jl | 35 +++++++++++++++++++++++++ stdlib/InteractiveUtils/src/macros.jl | 37 +++------------------------ 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/base/timing.jl b/base/timing.jl index 4880951f0a32d..1de3727756829 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -628,3 +628,38 @@ macro timed(ex) ) end end + +# Exported, documented, and tested in InteractiveUtils +# here so it's possible to time/trace all imports, including InteractiveUtils and its deps +macro time_imports(ex) + quote + try + Base.Threads.atomic_add!(Base.TIMING_IMPORTS, 1) + $(esc(ex)) + finally + Base.Threads.atomic_sub!(Base.TIMING_IMPORTS, 1) + end + end +end + +macro trace_compile(ex) + quote + try + ccall(:jl_force_trace_compile_timing_enable, Cvoid, ()) + $(esc(ex)) + finally + ccall(:jl_force_trace_compile_timing_disable, Cvoid, ()) + end + end +end + +macro trace_dispatch(ex) + quote + try + ccall(:jl_force_trace_dispatch_enable, Cvoid, ()) + $(esc(ex)) + finally + ccall(:jl_force_trace_dispatch_disable, Cvoid, ()) + end + end +end diff --git a/stdlib/InteractiveUtils/src/macros.jl b/stdlib/InteractiveUtils/src/macros.jl index 211687df47954..e338d8626fb0f 100644 --- a/stdlib/InteractiveUtils/src/macros.jl +++ b/stdlib/InteractiveUtils/src/macros.jl @@ -4,6 +4,10 @@ import Base: typesof, insert!, replace_ref_begin_end!, infer_effects +# defined in Base so it's possible to time all imports, including InteractiveUtils and its deps +# via. `Base.@time_imports` etc. +import Base: @time_imports, @trace_compile, @trace_dispatch + separate_kwargs(args...; kwargs...) = (args, values(kwargs)) """ @@ -245,39 +249,6 @@ macro code_lowered(ex0...) end end -macro time_imports(ex) - quote - try - Base.Threads.atomic_add!(Base.TIMING_IMPORTS, 1) - $(esc(ex)) - finally - Base.Threads.atomic_sub!(Base.TIMING_IMPORTS, 1) - end - end -end - -macro trace_compile(ex) - quote - try - ccall(:jl_force_trace_compile_timing_enable, Cvoid, ()) - $(esc(ex)) - finally - ccall(:jl_force_trace_compile_timing_disable, Cvoid, ()) - end - end -end - -macro trace_dispatch(ex) - quote - try - ccall(:jl_force_trace_dispatch_enable, Cvoid, ()) - $(esc(ex)) - finally - ccall(:jl_force_trace_dispatch_disable, Cvoid, ()) - end - end -end - """ @functionloc From 6de6b46b7e5f5438c04ced8510296e0a63507264 Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Tue, 22 Oct 2024 12:07:54 -0400 Subject: [PATCH 40/47] lowering: split `finally` blocks for exceptional control-flow (#55876) This change duplicates `finally` blocks in lowered IR, so that they can have a static nesting depth in the `try-catch` hierarchy. Previously, `finally` control-flow looked like this: ``` error non-error \ / \ / | finally block | / \ / \ error non-error ``` This kind of flow is a problem, because in a couple places the compiler assumes that it can actually "color" the CFG such that there is a static nesting depth at each BasicBlock (i.e. each BasicBlock can be labeled w/ a unique enclosing `try` / `catch` scope). The above `finally` pattern violates that assumption. In an upcoming PR, I want to extend the lifetimes of our Event Handlers (`jl_handler_t`) until the end of a `catch` block (rather than the start) which noticeably breaks `llvm-lower-handlers.cpp`. (@keno was very clear about this assumption in the comments for that pass.) Behaviorally this was _mostly_ benign, except for some mis-handling of an erroring entry that turns into a non-erroring exit. That could be fixed by banning `break` and `return` within `finally` blocks or making the lowering more complicated, but this PR instead splits the `finally` block into an erroring and non-erroring path so that we can attach the `catch` handler appropriately. --- src/julia-syntax.scm | 10 +++++++--- test/exceptions.jl | 12 ++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index 4b3e6ae96898b..b48cb48bf0b79 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -4854,10 +4854,14 @@ f(x) = yt(x) ;; separate trycatch and tryfinally blocks earlier. (mark-label catch) (if finally - (begin (enter-finally-block catchcode #f) ;; enter block via exception + (begin (set! finally-handler last-finally-handler) + (set! catch-token-stack (cons handler-token catch-token-stack)) + (compile (caddr e) break-labels #f #f) ;; enter block via exception + (emit '(call (top rethrow))) + (emit-return tail '(null)) ; unreachable + (set! catch-token-stack (cdr catch-token-stack)) (mark-label endl) ;; non-exceptional control flow enters here - (set! finally-handler last-finally-handler) - (compile (caddr e) break-labels #f #f) + (compile (renumber-assigned-ssavalues (caddr e)) break-labels #f #f) ;; emit actions to be taken at exit of finally ;; block, depending on the tag variable `finally` (let loop ((actions (caddr my-finally-handler))) diff --git a/test/exceptions.jl b/test/exceptions.jl index eb0bbaec35090..1e52c7a2fe2c3 100644 --- a/test/exceptions.jl +++ b/test/exceptions.jl @@ -241,6 +241,18 @@ end end end)() @test length(Base.current_exceptions()) == 0 + + (()-> begin + while true + try + error("foo") + finally + break + end + end + @test length(Base.current_exceptions()) == 0 + end)() + @test length(Base.current_exceptions()) == 0 end @testset "Deep exception stacks" begin From e4101b71dbcd766b2e4f162320d1d64c0f03c6f3 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 22 Oct 2024 12:26:07 -0400 Subject: [PATCH 41/47] add recompile comment in --trace-compile in terminal color mode too (#56275) Update: Just adds the comment in color terminal mode too --- I didn't think adding the `# recompile` text to the end in the repl was a good idea as it increases likelihood of text wrapping. And the color should be sufficient for local review, but when people copy from a color terminal we lose the recompile info. So this just adds a zero-length change indicator, for people to look out for. --- src/gf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/gf.c b/src/gf.c index e77c950c38ae4..285942cd157c5 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2566,12 +2566,10 @@ static void record_precompile_statement(jl_method_instance_t *mi, double compila jl_static_show(s_precompile, mi->specTypes); jl_printf(s_precompile, ")"); if (is_recompile) { + jl_printf(s_precompile, " # recompile"); if (s_precompile == JL_STDERR && jl_options.color != JL_OPTIONS_COLOR_OFF) { jl_printf(s_precompile, "\e[0m"); } - else { - jl_printf(s_precompile, " # recompile"); - } } jl_printf(s_precompile, "\n"); if (s_precompile != JL_STDERR) From 7c1935d5bf2a7b13008a9494f626febbccd8bf7c Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 22 Oct 2024 17:32:46 -0400 Subject: [PATCH 42/47] Some usability follow-ups and fixes for the world macro (#56273) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Allow fully qualified module names: `@world(Foo.Bar.baz, 1)` 2. Correct the printing order of the world macro and module qualifier 3. Add pretty printing for Binding(Partition). Example of the test: ``` julia> GlobalRef(Rebinding, :Foo).binding Binding Main.Rebinding.Foo 27497:∞ - undefined binding - guard entry 0:27496 - constant binding to @world(Main.Rebinding.Foo, 0:27496) ``` --------- Co-authored-by: Shuhei Kadowaki --- base/essentials.jl | 11 +++++--- base/range.jl | 6 ++--- base/show.jl | 63 ++++++++++++++++++++++++++++++++++++++++++++-- test/rebinding.jl | 6 +++++ 4 files changed, 77 insertions(+), 9 deletions(-) diff --git a/base/essentials.jl b/base/essentials.jl index a07aaa6769ed2..750ee0f9c434c 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -1258,8 +1258,8 @@ arbitrary code in fixed worlds. `world` may be `UnitRange`, in which case the ma will error unless the binding is valid and has the same value across the entire world range. -The `@world` macro is primarily used in the priniting of bindings that are no longer available -in the current world. +The `@world` macro is primarily used in the printing of bindings that are no longer +available in the current world. ## Example ``` @@ -1283,9 +1283,12 @@ julia> fold """ macro world(sym, world) if isa(sym, Symbol) - return :($(_resolve_in_world)($world, $(QuoteNode(GlobalRef(__module__, sym))))) + return :($(_resolve_in_world)($(esc(world)), $(QuoteNode(GlobalRef(__module__, sym))))) elseif isa(sym, GlobalRef) - return :($(_resolve_in_world)($world, $(QuoteNode(sym)))) + return :($(_resolve_in_world)($(esc(world)), $(QuoteNode(sym)))) + elseif isa(sym, Expr) && sym.head === :(.) && + length(sym.args) == 2 && isa(sym.args[2], QuoteNode) && isa(sym.args[2].value, Symbol) + return :($(_resolve_in_world)($(esc(world)), $(GlobalRef)($(esc(sym.args[1])), $(sym.args[2])))) else error("`@world` requires a symbol or GlobalRef") end diff --git a/base/range.jl b/base/range.jl index 3301335785878..cee15db39b911 100644 --- a/base/range.jl +++ b/base/range.jl @@ -1685,9 +1685,9 @@ end # The rest of this is defined in essentials.jl, but UnitRange is not available function _resolve_in_world(worlds::UnitRange, gr::GlobalRef) # Validate that this binding's reference covers the entire world range - bpart = lookup_binding_partition(first(worlds), gr) - if bpart.max_world < last(world) + bpart = lookup_binding_partition(UInt(first(worlds)), gr) + if bpart.max_world < last(worlds) error("Binding does not cover the full world range") end - _resolve_in_world(last(world), gr) + _resolve_in_world(UInt(last(worlds)), gr) end diff --git a/base/show.jl b/base/show.jl index 3aeb267b4a696..93f0a58b7cdd6 100644 --- a/base/show.jl +++ b/base/show.jl @@ -1061,6 +1061,8 @@ function show_type_name(io::IO, tn::Core.TypeName) sym = (globfunc ? globname : tn.name)::Symbol globfunc && print(io, "typeof(") quo = false + world = check_world_bounded(tn) + world !== nothing && print(io, "@world(") if !(get(io, :compact, false)::Bool) # Print module prefix unless type is visible from module passed to # IOContext If :module is not set, default to Main. @@ -1078,8 +1080,6 @@ function show_type_name(io::IO, tn::Core.TypeName) end end end - world = check_world_bounded(tn) - world !== nothing && print(io, "@world(") show_sym(io, sym) world !== nothing && print(io, ", ", world, ")") quo && print(io, ")") @@ -3359,3 +3359,62 @@ end function show(io::IO, ::MIME"text/plain", oc::Core.OpaqueClosure{A, R}) where {A, R} show(io, oc) end + +# printing bindings and partitions +function print_partition(io::IO, partition::Core.BindingPartition) + print(io, partition.min_world) + print(io, ":") + max_world = @atomic partition.max_world + if max_world == typemax(UInt) + print(io, '∞') + else + print(io, max_world) + end + print(io, " - ") + kind = binding_kind(partition) + if is_some_const_binding(kind) + print(io, "constant binding to ") + print(io, partition_restriction(partition)) + elseif kind == BINDING_KIND_GUARD + print(io, "undefined binding - guard entry") + elseif kind == BINDING_KIND_FAILED + print(io, "ambiguous binding - guard entry") + elseif kind == BINDING_KIND_DECLARED + print(io, "undefined, but declared using `global` - guard entry") + elseif kind == BINDING_KIND_IMPLICIT + print(io, "implicit `using` from ") + print(io, partition_restriction(partition)) + elseif kind == BINDING_KIND_EXPLICIT + print(io, "explicit `using` from ") + print(io, partition_restriction(partition)) + elseif kind == BINDING_KIND_IMPORTED + print(io, "explicit `import` from ") + print(io, partition_restriction(partition)) + else + @assert kind == BINDING_KIND_GLOBAL + print(io, "global variable with type ") + print(io, partition_restriction(partition)) + end +end + +function show(io::IO, ::MIME"text/plain", partition::Core.BindingPartition) + print(io, "BindingPartition ") + print_partition(io, partition) +end + +function show(io::IO, ::MIME"text/plain", bnd::Core.Binding) + print(io, "Binding ") + print(io, bnd.globalref) + if !isdefined(bnd, :partitions) + print(io, "No partitions") + else + partition = @atomic bnd.partitions + while true + println(io) + print(io, " ") + print_partition(io, partition) + isdefined(partition, :next) || break + partition = @atomic partition.next + end + end +end diff --git a/test/rebinding.jl b/test/rebinding.jl index 4066d91bc4b9b..564be70e44913 100644 --- a/test/rebinding.jl +++ b/test/rebinding.jl @@ -7,6 +7,7 @@ module Rebinding struct Foo x::Int end + const defined_world_age = Base.tls_world_age() x = Foo(1) @test Base.binding_kind(@__MODULE__, :Foo) == Base.BINDING_KIND_CONST @@ -15,4 +16,9 @@ module Rebinding @test Base.binding_kind(@__MODULE__, :Foo) == Base.BINDING_KIND_GUARD @test contains(repr(x), "@world") + + # Tests for @world syntax + @test Base.@world(Foo, defined_world_age) == typeof(x) + @test Base.@world(Rebinding.Foo, defined_world_age) == typeof(x) + @test Base.@world((@__MODULE__).Foo, defined_world_age) == typeof(x) end From 049d92a2ac506316ca2413e103647f72ce847b56 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 22 Oct 2024 18:14:39 -0400 Subject: [PATCH 43/47] REPL: Don't search for ?( completions when hinting (#56278) --- stdlib/REPL/src/REPLCompletions.jl | 50 ++++++++++++++++-------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl index 42480aea91605..3188a6ca42a12 100644 --- a/stdlib/REPL/src/REPLCompletions.jl +++ b/stdlib/REPL/src/REPLCompletions.jl @@ -1225,33 +1225,35 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif partial = string[1:pos] inc_tag = Base.incomplete_tag(Meta.parse(partial, raise=false, depwarn=false)) - # ?(x, y)TAB lists methods you can call with these objects - # ?(x, y TAB lists methods that take these objects as the first two arguments - # MyModule.?(x, y)TAB restricts the search to names in MyModule - rexm = match(r"(\w+\.|)\?\((.*)$", partial) - if rexm !== nothing - # Get the module scope - if isempty(rexm.captures[1]) - callee_module = context_module - else - modname = Symbol(rexm.captures[1][1:end-1]) - if isdefined(context_module, modname) - callee_module = getfield(context_module, modname) - if !isa(callee_module, Module) + if !hint # require a tab press for completion of these + # ?(x, y)TAB lists methods you can call with these objects + # ?(x, y TAB lists methods that take these objects as the first two arguments + # MyModule.?(x, y)TAB restricts the search to names in MyModule + rexm = match(r"(\w+\.|)\?\((.*)$", partial) + if rexm !== nothing + # Get the module scope + if isempty(rexm.captures[1]) + callee_module = context_module + else + modname = Symbol(rexm.captures[1][1:end-1]) + if isdefined(context_module, modname) + callee_module = getfield(context_module, modname) + if !isa(callee_module, Module) + callee_module = context_module + end + else callee_module = context_module end - else - callee_module = context_module end - end - moreargs = !endswith(rexm.captures[2], ')') - callstr = "_(" * rexm.captures[2] - if moreargs - callstr *= ')' - end - ex_org = Meta.parse(callstr, raise=false, depwarn=false) - if isa(ex_org, Expr) - return complete_any_methods(ex_org, callee_module::Module, context_module, moreargs, shift), (0:length(rexm.captures[1])+1) .+ rexm.offset, false + moreargs = !endswith(rexm.captures[2], ')') + callstr = "_(" * rexm.captures[2] + if moreargs + callstr *= ')' + end + ex_org = Meta.parse(callstr, raise=false, depwarn=false) + if isa(ex_org, Expr) + return complete_any_methods(ex_org, callee_module::Module, context_module, moreargs, shift), (0:length(rexm.captures[1])+1) .+ rexm.offset, false + end end end From 73b85cfc04d83cb4b630dbd36ad2c270cf548330 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 22 Oct 2024 21:12:50 -0400 Subject: [PATCH 44/47] Use a curried helper for module-local eval/include (#55949) In https://github.com/JuliaLang/julia/pull/55466, the automatically added `include` method for non-bare modules was adjusted to conform the signature to the version of those methods in Main (defined in sysimg.jl, since `Main` is technically a bare module). Unfortunately, this broke some downstream packages which overload Base.include with additional functionality and got broken by the additional type restriction. The motivation in https://github.com/JuliaLang/julia/pull/55466 was to give a slightly nicer MethodError. While I don't think this is per-se a particularly strong justification, I do agree that it's awkward for the `Main` version of these functions to have (marginally) different behavior than the version of these functions that gets introduced automatically in new modules (Which has been the case ever since [1], which added the AbstractString restriction in `Main`, but not in the auto-generated versions). This is particularly true, because we use the `Main` version to document the auto-introduction of these methods, which has regularly been a point of confusion. This PR tries to address this problem once and for all, but just not generating special methods into every new module. Instead, there are curried helpers for eval and include in Core and Base (respectively), which can be added to a module simply by doing `const include = IncludeInto(MyModule)` (and similarly for `eval`). As before, this happens automatically for non-bare modules. It thus conforms the behavior of the `Main` version of these functions and the auto-generated versions by construction. Additionally, it saves us having to generate all the additional code/types/objects, etc associated with having extra generic functions in each new module. The impact of this isn't huge, because there aren't that many modules, but it feels conceptually nicer. There is a little bit of extra work in this PR because we have special snowflake backtrace printing code for the `include` machinery, which needs adjusting, but other than that the change is straightforward. [1] https://github.com/JuliaLang/julia/commit/957848b899c7b5389af34cf815aa7bd2b6e2bf82 --------- Co-authored-by: Jameson Nash --- base/Base.jl | 14 ++++++++++++-- base/boot.jl | 8 ++++++-- base/docs/basedocs.jl | 2 +- base/errorshow.jl | 5 ++++- base/loading.jl | 8 ++++---- base/show.jl | 19 +++++++++++++++++++ base/sysimg.jl | 8 ++------ src/jlfrontend.scm | 22 ---------------------- src/toplevel.c | 14 ++++++++++---- test/docs.jl | 2 +- test/reflection.jl | 4 ++-- 11 files changed, 61 insertions(+), 45 deletions(-) diff --git a/base/Base.jl b/base/Base.jl index 5fb764bd4cc01..bfac74e5d7bab 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -25,6 +25,11 @@ function include(mod::Module, path::String) end include(path::String) = include(Base, path) +struct IncludeInto <: Function + m::Module +end +(this::IncludeInto)(fname::AbstractString) = include(this.m, fname) + # from now on, this is now a top-module for resolving syntax const is_primary_base_module = ccall(:jl_module_parent, Ref{Module}, (Any,), Base) === Core.Main ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Base, is_primary_base_module) @@ -572,6 +577,9 @@ include("precompilation.jl") for m in methods(include) delete_method(m) end +for m in methods(IncludeInto(Base)) + delete_method(m) +end # This method is here only to be overwritten during the test suite to test # various sysimg related invalidation scenarios. @@ -579,8 +587,10 @@ a_method_to_overwrite_in_test() = inferencebarrier(1) # These functions are duplicated in client.jl/include(::String) for # nicer stacktraces. Modifications here have to be backported there -include(mod::Module, _path::AbstractString) = _include(identity, mod, _path) -include(mapexpr::Function, mod::Module, _path::AbstractString) = _include(mapexpr, mod, _path) +@noinline include(mod::Module, _path::AbstractString) = _include(identity, mod, _path) +@noinline include(mapexpr::Function, mod::Module, _path::AbstractString) = _include(mapexpr, mod, _path) +(this::IncludeInto)(fname::AbstractString) = include(identity, this.m, fname) +(this::IncludeInto)(mapexpr::Function, fname::AbstractString) = include(mapexpr, this.m, fname) # External libraries vendored into Base Core.println("JuliaSyntax/src/JuliaSyntax.jl") diff --git a/base/boot.jl b/base/boot.jl index 861c83a2edac5..ed3e22391f215 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -454,9 +454,13 @@ Nothing() = nothing # This should always be inlined getptls() = ccall(:jl_get_ptls_states, Ptr{Cvoid}, ()) -include(m::Module, fname::String) = ccall(:jl_load_, Any, (Any, Any), m, fname) +include(m::Module, fname::String) = (@noinline; ccall(:jl_load_, Any, (Any, Any), m, fname)) +eval(m::Module, @nospecialize(e)) = (@noinline; ccall(:jl_toplevel_eval_in, Any, (Any, Any), m, e)) -eval(m::Module, @nospecialize(e)) = ccall(:jl_toplevel_eval_in, Any, (Any, Any), m, e) +struct EvalInto <: Function + m::Module +end +(this::EvalInto)(@nospecialize(e)) = eval(this.m, e) mutable struct Box contents::Any diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl index 0d5d5ac00e8d0..b080bf51e5e98 100644 --- a/base/docs/basedocs.jl +++ b/base/docs/basedocs.jl @@ -2580,7 +2580,7 @@ cases. See also [`setproperty!`](@ref Base.setproperty!) and [`getglobal`](@ref) # Examples -```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*)*" +```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*\\n.*)*" julia> module M; global a; end; julia> M.a # same as `getglobal(M, :a)` diff --git a/base/errorshow.jl b/base/errorshow.jl index 20bdee1de6ec0..7225a024f529e 100644 --- a/base/errorshow.jl +++ b/base/errorshow.jl @@ -850,7 +850,10 @@ function _simplify_include_frames(trace) for i in length(trace):-1:1 frame::StackFrame, _ = trace[i] mod = parentmodule(frame) - if first_ignored === nothing + if mod === Base && frame.func === :IncludeInto || + mod === Core && frame.func === :EvalInto + kept_frames[i] = false + elseif first_ignored === nothing if mod === Base && frame.func === :_include # Hide include() machinery by default first_ignored = i diff --git a/base/loading.jl b/base/loading.jl index dfcf22b2b0751..69bb332193519 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -2890,12 +2890,12 @@ julia> rm("testfile.jl") ``` """ function evalfile(path::AbstractString, args::Vector{String}=String[]) - return Core.eval(Module(:__anon__), + m = Module(:__anon__) + return Core.eval(m, Expr(:toplevel, :(const ARGS = $args), - :(eval(x) = $(Expr(:core, :eval))(__anon__, x)), - :(include(x::AbstractString) = $(Expr(:top, :include))(__anon__, x)), - :(include(mapexpr::Function, x::AbstractString) = $(Expr(:top, :include))(mapexpr, __anon__, x)), + :(const include = $(Base.IncludeInto(m))), + :(const eval = $(Core.EvalInto(m))), :(include($path)))) end evalfile(path::AbstractString, args::Vector) = evalfile(path, String[args...]) diff --git a/base/show.jl b/base/show.jl index 93f0a58b7cdd6..ee467ae90ff50 100644 --- a/base/show.jl +++ b/base/show.jl @@ -3418,3 +3418,22 @@ function show(io::IO, ::MIME"text/plain", bnd::Core.Binding) end end end + +# Special pretty printing for EvalInto/IncludeInto +function show(io::IO, ii::IncludeInto) + if getglobal(ii.m, :include) === ii + print(io, ii.m) + print(io, ".include") + else + show_default(io, ii) + end +end + +function show(io::IO, ei::Core.EvalInto) + if getglobal(ei.m, :eval) === ei + print(io, ei.m) + print(io, ".eval") + else + show_default(io, ei) + end +end diff --git a/base/sysimg.jl b/base/sysimg.jl index 966ed76751f28..ccc8ef38e81bc 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -32,11 +32,7 @@ Use [`Base.include`](@ref) to evaluate a file into another module. !!! compat "Julia 1.5" Julia 1.5 is required for passing the `mapexpr` argument. """ -include(mapexpr::Function, fname::AbstractString) = Base._include(mapexpr, Main, fname) -function include(fname::AbstractString) - isa(fname, String) || (fname = Base.convert(String, fname)::String) - Base._include(identity, Main, fname) -end +const include = Base.IncludeInto(Main) """ eval(expr) @@ -45,7 +41,7 @@ Evaluate an expression in the global scope of the containing module. Every `Module` (except those defined with `baremodule`) has its own 1-argument definition of `eval`, which evaluates expressions in that module. """ -eval(x) = Core.eval(Main, x) +const eval = Core.EvalInto(Main) # Ensure this file is also tracked pushfirst!(Base._included_files, (@__MODULE__, abspath(@__FILE__))) diff --git a/src/jlfrontend.scm b/src/jlfrontend.scm index 463e39c41d00a..808af18ebfdbd 100644 --- a/src/jlfrontend.scm +++ b/src/jlfrontend.scm @@ -199,28 +199,6 @@ (error-wrap (lambda () (julia-expand-macroscope expr)))) -;; construct default definitions of `eval` for non-bare modules -;; called by jl_eval_module_expr -(define (module-default-defs name file line) - (jl-expand-to-thunk - (let* ((loc (if (and (eq? file 'none) (eq? line 0)) '() `((line ,line ,file)))) - (x (if (eq? name 'x) 'y 'x)) - (mex (if (eq? name 'mapexpr) 'map_expr 'mapexpr))) - `(block - (= (call eval ,x) - (block - ,@loc - (call (core eval) ,name ,x))) - (= (call include (:: ,x (top AbstractString))) - (block - ,@loc - (call (core _call_latest) (top include) ,name ,x))) - (= (call include (:: ,mex (top Function)) (:: ,x (top AbstractString))) - (block - ,@loc - (call (core _call_latest) (top include) ,mex ,name ,x))))) - file line)) - ; run whole frontend on a string. useful for testing. (define (fe str) (expand-toplevel-expr (julia-parse str) 'none 0)) diff --git a/src/toplevel.c b/src/toplevel.c index 6f2e0cf77568a..c2fbc38d067eb 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -206,11 +206,17 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex if (std_imports) { if (jl_base_module != NULL) { jl_add_standard_imports(newm); + jl_datatype_t *include_into = (jl_datatype_t *)jl_get_global(jl_base_module, jl_symbol("IncludeInto")); + if (include_into) { + form = jl_new_struct(include_into, newm); + jl_set_const(newm, jl_symbol("include"), form); + } + } + jl_datatype_t *eval_into = (jl_datatype_t *)jl_get_global(jl_core_module, jl_symbol("EvalInto")); + if (eval_into) { + form = jl_new_struct(eval_into, newm); + jl_set_const(newm, jl_symbol("eval"), form); } - // add `eval` function - form = jl_call_scm_on_ast_and_loc("module-default-defs", (jl_value_t*)name, newm, filename, lineno); - jl_toplevel_eval_flex(newm, form, 0, 1, &filename, &lineno); - form = NULL; } newm->file = jl_symbol(filename); diff --git a/test/docs.jl b/test/docs.jl index 92d45fe05e397..8db9db30b8463 100644 --- a/test/docs.jl +++ b/test/docs.jl @@ -101,7 +101,7 @@ end @test Docs.undocumented_names(_ModuleWithUndocumentedNames) == [Symbol("@foo"), :f, :⨳] @test isempty(Docs.undocumented_names(_ModuleWithSomeDocumentedNames)) -@test Docs.undocumented_names(_ModuleWithSomeDocumentedNames; private=true) == [:eval, :g, :include] +@test Docs.undocumented_names(_ModuleWithSomeDocumentedNames; private=true) == [:g] # issue #11548 diff --git a/test/reflection.jl b/test/reflection.jl index 634390e0680d1..8c701acb9c09d 100644 --- a/test/reflection.jl +++ b/test/reflection.jl @@ -179,7 +179,7 @@ let @test Base.binding_module(TestMod7648.TestModSub9475, :b9475) == TestMod7648.TestModSub9475 defaultset = Set(Symbol[:Foo7648, :TestMod7648, :a9475, :c7648, :f9475, :foo7648, :foo7648_nomethods]) allset = defaultset ∪ Set(Symbol[ - Symbol("#eval"), Symbol("#foo7648"), Symbol("#foo7648_nomethods"), Symbol("#include"), + Symbol("#foo7648"), Symbol("#foo7648_nomethods"), :TestModSub9475, :d7648, :eval, :f7648, :include]) imported = Set(Symbol[:convert, :curmod_name, :curmod]) usings_from_Test = Set(Symbol[ @@ -265,7 +265,7 @@ let defaultset = Set((:A,)) imported = Set((:M2,)) usings_from_Base = delete!(Set(names(Module(); usings=true)), :anonymous) # the name of the anonymous module itself usings = Set((:A, :f, :C, :y, :M1, :m1_x)) ∪ usings_from_Base - allset = Set((:A, :B, :C, :eval, :include, Symbol("#eval"), Symbol("#include"))) + allset = Set((:A, :B, :C, :eval, :include)) @test Set(names(TestMod54609.A)) == defaultset @test Set(names(TestMod54609.A, imported=true)) == defaultset ∪ imported @test Set(names(TestMod54609.A, usings=true)) == defaultset ∪ usings From be0ce9dbf0597c1ff50fc73f3d197a19708c4cd3 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 23 Oct 2024 01:14:02 -0400 Subject: [PATCH 45/47] Don't try to allocate new binding partitions from static show (#56298) In particular static show is used inside the GC for profiling, which showed up as a segfault on CI, e.g. in https://buildkite.com/julialang/julia-master/builds/41407#0192b628-47f3-49f9-a081-cd2708eb6121. GC check didn't catch it because that file is explicitly exempt: https://github.com/JuliaLang/julia/blob/master/src/Makefile#L504 --- src/julia.h | 1 + src/module.c | 21 +++++++++++++++++++++ src/rtutils.c | 2 +- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/julia.h b/src/julia.h index dd79dbb82c28d..1d36dba519700 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1848,6 +1848,7 @@ JL_DLLEXPORT jl_sym_t *jl_tagged_gensym(const char *str, size_t len); JL_DLLEXPORT jl_sym_t *jl_get_root_symbol(void); JL_DLLEXPORT jl_value_t *jl_get_binding_value(jl_binding_t *b JL_PROPAGATES_ROOT); JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_const(jl_binding_t *b JL_PROPAGATES_ROOT); +JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_resolved_and_const(jl_binding_t *b JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT; JL_DLLEXPORT jl_value_t *jl_declare_const_gf(jl_binding_t *b, jl_module_t *mod, jl_sym_t *name); JL_DLLEXPORT jl_method_t *jl_method_def(jl_svec_t *argdata, jl_methtable_t *mt, jl_code_info_t *f, jl_module_t *module); JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world, jl_code_instance_t **cache); diff --git a/src/module.c b/src/module.c index f1098e22ff522..9b4d26cc7b000 100644 --- a/src/module.c +++ b/src/module.c @@ -48,6 +48,7 @@ jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b, size_t world) return bpart; jl_binding_partition_t *new_bpart = new_binding_partition(); jl_atomic_store_relaxed(&new_bpart->next, bpart); + jl_gc_wb_fresh(new_bpart, bpart); if (bpart) new_bpart->min_world = jl_atomic_load_relaxed(&bpart->max_world) + 1; jl_atomic_store_relaxed(&new_bpart->max_world, max_world); @@ -350,6 +351,26 @@ JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_const(jl_binding_t *b) return decode_restriction_value(pku); } +JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_resolved_and_const(jl_binding_t *b) +{ + // Unlike jl_get_binding_value_if_const this doesn't try to allocate new binding partitions if they + // don't already exist, making this JL_NOTSAFEPOINT. + if (!b) + return NULL; + jl_binding_partition_t *bpart = jl_atomic_load_relaxed(&b->partitions); + if (!bpart) + return NULL; + size_t max_world = jl_atomic_load_relaxed(&bpart->max_world); + if (bpart->min_world > jl_current_task->world_age || jl_current_task->world_age > max_world) + return NULL; + jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction); + if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) + return NULL; + if (!jl_bkind_is_some_constant(decode_restriction_kind(pku))) + return NULL; + return decode_restriction_value(pku); +} + JL_DLLEXPORT jl_value_t *jl_bpart_get_restriction_value(jl_binding_partition_t *bpart) { jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction); diff --git a/src/rtutils.c b/src/rtutils.c index 85a9be5e0b1da..faa087dcb077d 100644 --- a/src/rtutils.c +++ b/src/rtutils.c @@ -661,7 +661,7 @@ static int is_globname_binding(jl_value_t *v, jl_datatype_t *dv) JL_NOTSAFEPOINT jl_sym_t *globname = dv->name->mt != NULL ? dv->name->mt->name : NULL; if (globname && dv->name->module) { jl_binding_t *b = jl_get_module_binding(dv->name->module, globname, 0); - jl_value_t *bv = jl_get_binding_value_if_const(b); + jl_value_t *bv = jl_get_binding_value_if_resolved_and_const(b); // The `||` makes this function work for both function instances and function types. if (bv && (bv == v || jl_typeof(bv) == v)) return 1; From 5e4fb519b5955b2f9ff9a26bd1dc5454561ecef7 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Wed, 23 Oct 2024 15:26:38 +0530 Subject: [PATCH 46/47] Fix fetching parent in symmetric algebra (#56286) We only need the `parent` of the result if it is a triangular matrix. For other structurally triangular matrices, such as `Diagonal`s, we may use these directly in the `Hermitian` constructor. The operation proceeds as (assuming `H isa Hermitian` with `H.uplo == 'U'`): ```julia function +(H::Hermitian, H::Hermitian) U = uppertriangular(parent(H)) Ures = U + U data = Ures isa UpperTriangular ? parent(Ures) : Ures # this PR Hermitian(data, :U) end ``` This accounts for the fact that `Ures` may not be an `UpperTriangular`, as `uppertriangular` might be specialized by the parent. In such cases we should not extract the parent. Currently, only `Diagonal` specializes `uppertriangular`, so this issue only exists for a `Hermitian` or a `Symmetric` that wraps a `Diagonal`. Fixes https://github.com/JuliaLang/julia/issues/56283 --- stdlib/LinearAlgebra/src/symmetric.jl | 4 +++- stdlib/LinearAlgebra/test/symmetric.jl | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl index e17eb80d25453..265995d9e7806 100644 --- a/stdlib/LinearAlgebra/src/symmetric.jl +++ b/stdlib/LinearAlgebra/src/symmetric.jl @@ -307,7 +307,9 @@ function applytri(f, A::HermOrSym, B::HermOrSym) f(uppertriangular(_conjugation(A)(A.data)), uppertriangular(B.data)) end end -parentof_applytri(f, args...) = applytri(parent ∘ f, args...) +_parent_tri(U::UpperOrLowerTriangular) = parent(U) +_parent_tri(U) = U +parentof_applytri(f, args...) = _parent_tri(applytri(f, args...)) isdiag(A::HermOrSym) = applytri(isdiag, A) diff --git a/stdlib/LinearAlgebra/test/symmetric.jl b/stdlib/LinearAlgebra/test/symmetric.jl index 7a51ab9d454af..3aef23617b942 100644 --- a/stdlib/LinearAlgebra/test/symmetric.jl +++ b/stdlib/LinearAlgebra/test/symmetric.jl @@ -1160,4 +1160,11 @@ end @test symT-s == Array(symT) - Array(s) end +@testset "issue #56283" begin + a = 1.0 + D = Diagonal(randn(10)) + H = Hermitian(D*D') + @test a*H == H +end + end # module TestSymmetric From 133051f20f5995d4128f9c7973efff62ed25e919 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Wed, 23 Oct 2024 07:59:06 -0400 Subject: [PATCH 47/47] REPL: fix closing quote on completing files in a ~ path (#56253) --- stdlib/REPL/src/REPLCompletions.jl | 18 +++++++----------- stdlib/REPL/test/replcompletions.jl | 27 ++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl index 3188a6ca42a12..d230b7b5fd232 100644 --- a/stdlib/REPL/src/REPLCompletions.jl +++ b/stdlib/REPL/src/REPLCompletions.jl @@ -935,17 +935,11 @@ function get_import_mode(s::String) return nothing end -function close_path_completion(dir, paths, str, pos) - length(paths) == 1 || return false # Only close if there's a single choice... - path = (paths[1]::PathCompletion).path +function close_path_completion(dir, path, str, pos) path = unescape_string(replace(path, "\\\$"=>"\$")) path = joinpath(dir, path) # ...except if it's a directory... - try - isdir(path) - catch e - e isa Base.IOError || rethrow() # `path` cannot be determined to be a file - end && return false + Base.isaccessibledir(path) && return false # ...and except if there's already a " at the cursor. return lastindex(str) <= pos || str[nextind(str, pos)] != '"' end @@ -1358,10 +1352,12 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif if !isnothing(path) paths, dir, success = complete_path(path::String, string_escape=true) - if close_path_completion(dir, paths, path, pos) - p = (paths[1]::PathCompletion).path * "\"" + if length(paths) == 1 + p = (paths[1]::PathCompletion).path hint && was_expanded && (p = contractuser(p)) - paths[1] = PathCompletion(p) + if close_path_completion(dir, p, path, pos) + paths[1] = PathCompletion(p * "\"") + end end if success && !isempty(dir) diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl index cfb9a6137a287..4fe32f47bc80c 100644 --- a/stdlib/REPL/test/replcompletions.jl +++ b/stdlib/REPL/test/replcompletions.jl @@ -1236,7 +1236,7 @@ let current_dir, forbidden e isa Base.IOError && occursin("ELOOP", e.msg) end c, r = test_complete("\"$(escape_string(path))/selfsym") - @test c == ["selfsymlink"] + @test c == ["selfsymlink\""] end end @@ -1357,6 +1357,31 @@ let (c, r, res) = test_complete("\"~/julia") c, r, res = test_complete("\"foo~bar") @test !res end +if !Sys.iswindows() + # create a dir and file temporarily in the home directory + path = mkpath(joinpath(homedir(), "Zx6Wa0GkC0")) + touch(joinpath(path, "my_file")) + try + let (c, r, res) = test_complete("\"~/Zx6Wa0GkC") + @test res + @test c == String["Zx6Wa0GkC0/"] + end + let (c, r, res) = test_complete("\"~/Zx6Wa0GkC0") + @test res + @test c == String[homedir() * "/Zx6Wa0GkC0"] + end + let (c, r, res) = test_complete("\"~/Zx6Wa0GkC0/my_") + @test res + @test c == String["my_file\""] + end + let (c, r, res) = test_complete("\"~/Zx6Wa0GkC0/my_file") + @test res + @test c == String[homedir() * "/Zx6Wa0GkC0/my_file"] + end + finally + rm(path, recursive=true) + end +end # Test the completion returns nothing when the folder do not exist let (c, r) = test_complete("cd(\"folder_do_not_exist_77/file")