From add4fe5bd2f28ded045a36e98dcecb0e8c1005a0 Mon Sep 17 00:00:00 2001 From: Frej Drejhammar Date: Wed, 19 Jul 2023 12:19:43 +0200 Subject: [PATCH] compiler: Prune dead variables during alias analysis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Variables which die in a basic block cannot influence the alias status of variables in successor blocks. By pruning dead variables, which are not part of a parent-child derivation relationship of live variables, the size of the active sharing state is reduced. The reduction in size speeds up subsequent `aa_merge_ss/3` operations. Combined with improved kill-set calculation (85454714956f54679ae75ef49d10d7992e31b9e2) and an improved data structure for describing the alias status of variables (c389665cc991fb8a9d123fe34dc95c12bdf1263f), this patch provides a substantial reduction of the time required for alias analysis. For the set of modules compiled by `scripts/diffable` the time spent in alias analysis is reduced by approximately 55%. For the example in Issue #7432 [1], provided by José Valim, which has a large number of variables, the reduction is even more dramatic. The time spent in the alias analysis pass is reduced by 97%. [1] https://github.com/erlang/otp/issues/7432 Closes: #7432 --- lib/compiler/src/beam_ssa_alias.erl | 110 +++++++++++++++++++++------- 1 file changed, 83 insertions(+), 27 deletions(-) diff --git a/lib/compiler/src/beam_ssa_alias.erl b/lib/compiler/src/beam_ssa_alias.erl index 560d5f9ecd7d..2d41a9178953 100644 --- a/lib/compiler/src/beam_ssa_alias.erl +++ b/lib/compiler/src/beam_ssa_alias.erl @@ -377,7 +377,7 @@ aa_fixpoint([], Order, _OldAliasMap, _OldCallArgs, aa_fun(F, #opt_st{ssa=Linear0,args=Args}, AAS0=#aas{alias_map=AliasMap0,call_args=CallArgs0, - func_db=FuncDb,repeats=Repeats0}) -> + func_db=FuncDb,kills=KillsMap,repeats=Repeats0}) -> %% Initially assume all formal parameters are unique for a %% non-exported function, if we have call argument info in the %% AAS, we use it. For an exported function, all arguments are @@ -387,7 +387,9 @@ aa_fun(F, #opt_st{ssa=Linear0,args=Args}, aa_new_ssa_var(Var, Status, Acc) end, #{}, ArgsStatus), ?DP("Args: ~p~n", [ArgsStatus]), - {SS,#aas{call_args=CallArgs}=AAS} = aa_blocks(Linear0, #{0=>SS0}, AAS0), + #{F:=Kills} = KillsMap, + {SS,#aas{call_args=CallArgs}=AAS} = + aa_blocks(Linear0, Kills, #{0=>SS0}, AAS0), ?DP("SS:~n~p~n~n", [SS]), AliasMap = AliasMap0#{ F => SS }, @@ -405,20 +407,23 @@ aa_fun(F, #opt_st{ssa=Linear0,args=Args}, AAS#aas{alias_map=AliasMap,repeats=Repeats}. %% Main entry point for the alias analysis -aa_blocks([{?EXCEPTION_BLOCK,_}|Bs], Lbl2SS, AAS) -> +aa_blocks([{?EXCEPTION_BLOCK,_}|Bs], Kills, Lbl2SS, AAS) -> %% Nothing happening in the exception block can propagate to the %% other block. - aa_blocks(Bs, Lbl2SS, AAS); -aa_blocks([{L,#b_blk{is=Is0,last=T0}}|Bs0], Lbl2SS0, AAS0) -> + aa_blocks(Bs, Kills, Lbl2SS, AAS); +aa_blocks([{L,#b_blk{is=Is0,last=T}}|Bs0], Kills, Lbl2SS0, AAS0) -> #{L:=SS0} = Lbl2SS0, - {SS1,AAS1} = aa_is(Is0, L, SS0, AAS0), - Lbl2SS1 = aa_terminator(T0, SS1, L, Lbl2SS0), - aa_blocks(Bs0, Lbl2SS1, AAS1); -aa_blocks([], Lbl2SS, AAS) -> + {FullSS,AAS1} = aa_is(Is0, SS0, AAS0), + #{{live_outs,L}:=LiveOut} = Kills, + {Lbl2SS1,Successors} = aa_terminator(T, FullSS, Lbl2SS0), + PrunedSS = aa_prune_ss(FullSS, LiveOut), + Lbl2SS2 = aa_add_block_entry_ss(Successors, PrunedSS, Lbl2SS1), + Lbl2SS = aa_set_block_exit_ss(L, FullSS, Lbl2SS2), + aa_blocks(Bs0, Kills, Lbl2SS, AAS1); +aa_blocks([], _Kills, Lbl2SS, AAS) -> {Lbl2SS,AAS}. -aa_is([I=#b_set{dst=Dst,op=Op,args=Args,anno=Anno0}|Is], - ThisBlock, SS0, AAS0) -> +aa_is([I=#b_set{dst=Dst,op=Op,args=Args,anno=Anno0}|Is], SS0, AAS0) -> SS1 = aa_new_ssa_var(Dst, unique, SS0), {SS, AAS} = case Op of @@ -540,18 +545,15 @@ aa_is([I=#b_set{dst=Dst,op=Op,args=Args,anno=Anno0}|Is], _ -> exit({unknown_instruction, I}) end, - aa_is(Is, ThisBlock, SS, AAS); -aa_is([], _, SS, AAS) -> + aa_is(Is, SS, AAS); +aa_is([], SS, AAS) -> {SS, AAS}. -aa_terminator(#b_br{succ=S,fail=S}, - SS, ThisBlock, Lbl2SS) -> - aa_set_block_exit_ss(ThisBlock, SS, aa_add_block_entry_ss([S], SS, Lbl2SS)); -aa_terminator(#b_br{succ=S,fail=F}, - SS, ThisBlock, Lbl2SS) -> - aa_set_block_exit_ss(ThisBlock, SS, - aa_add_block_entry_ss([S,F], SS, Lbl2SS)); -aa_terminator(#b_ret{arg=Arg,anno=Anno0}, SS, ThisBlock, Lbl2SS0) -> +aa_terminator(#b_br{succ=S,fail=S}, _SS, Lbl2SS) -> + {Lbl2SS,[S]}; +aa_terminator(#b_br{succ=S,fail=F}, _SS, Lbl2SS) -> + {Lbl2SS,[S,F]}; +aa_terminator(#b_ret{arg=Arg,anno=Anno0}, SS, Lbl2SS0) -> Type = maps:get(result_type, Anno0, any), Status0 = aa_get_status(Arg, SS), ?DP("Returned ~p:~p:~p~n", [Arg, Status0, Type]), @@ -565,11 +567,9 @@ aa_terminator(#b_ret{arg=Arg,anno=Anno0}, SS, ThisBlock, Lbl2SS0) -> Type2Status = Type2Status0#{ Type => Status }, ?DP("New status map: ~p~n", [Type2Status]), Lbl2SS = Lbl2SS0#{ returns => Type2Status}, - aa_set_block_exit_ss(ThisBlock, SS, Lbl2SS); -aa_terminator(#b_switch{fail=F,list=Ls}, - SS, ThisBlock, Lbl2SS0) -> - Lbl2SS = aa_add_block_entry_ss([F|[L || {_,L} <- Ls]], SS, Lbl2SS0), - aa_set_block_exit_ss(ThisBlock, SS, Lbl2SS). + {Lbl2SS, []}; +aa_terminator(#b_switch{fail=F,list=Ls}, _SS, Lbl2SS) -> + {Lbl2SS,[F|[L || {_,L} <- Ls]]}. %% Store the updated SS for the point where execution leaves the %% block. @@ -916,6 +916,63 @@ aa_derive_from(#b_var{}=Dst, #b_var{}=Parent, State) -> ?aa_assert_ss(State#{Dst=>ChildVas,Parent=>ParentVas}) end. +aa_prune_ss(SS, Live) -> + aa_prune_ss(SS, sets:to_list(Live), Live, #{}). +aa_prune_ss(SS, [V|Wanted], Live, Pruned) -> + case is_map_key(V, Pruned) of + false -> + %% This variable has to be kept, copy it, add it to the + %% set of live nodes and add the parents to the work list. + #{V:=#vas{parents=Ps}=Vas} = SS, + aa_prune_ss(SS, Ps++Wanted, + sets:add_element(V, Live), + Pruned#{V=>Vas}); + true -> + %% This variable is alread added. + aa_prune_ss(SS, Wanted, Live, Pruned) + end; +aa_prune_ss(_SS, [], Live, Pruned) -> + %% Now strip all references to variables not in the live set. + PruneRefs = fun(#vas{parents=Ps0,child=Child0,extracted=Es0, + tuple_elems=Ts0,pair_elems=Pes0}=Vas) -> + Ps = [P || P <- Ps0, sets:is_element(P, Live)], + Child = case sets:is_element(Child0, Live) of + true -> + Child0; + false -> + none + end, + Es = [E || E <- Es0, sets:is_element(E, Live)], + Ts = [E + || {_,Var}=E <- Ts0, sets:is_element(Var, Live)], + Pes = case Pes0 of + {_,X}=P -> + case sets:is_element(X, Live) of + true -> + P; + _ -> + none + end; + {both,X,Y}=P -> + case {sets:is_element(X, Live), + sets:is_element(Y, Live)} of + {true,true} -> + P; + {true,false} -> + {hd,X}; + {false,true} -> + {tl,Y}; + _ -> + none + end; + none -> + none + end, + Vas#vas{parents=Ps,child=Child,extracted=Es, + tuple_elems=Ts,pair_elems=Pes} + end, + #{V=>PruneRefs(Vas) || V:=Vas <- Pruned}. + aa_update_annotations(Funs, #aas{alias_map=AliasMap0,st_map=StMap0}=AAS) -> foldl(fun(F, {StMapAcc,AliasMapAcc}) -> #{F:=Lbl2SS0} = AliasMapAcc, @@ -1368,7 +1425,6 @@ aa_breadth_first([], [], _Seen, _FuncDb) -> []; aa_breadth_first([], Next, Seen, FuncDb) -> aa_breadth_first(Next, [], Seen, FuncDb). - -ifdef(EXTRA_ASSERTS). -spec aa_assert_ss(sharing_state()) -> sharing_state().