Skip to content

Commit

Permalink
compiler: Add support debug information in BEAM files
Browse files Browse the repository at this point in the history
  • Loading branch information
bjorng committed Sep 3, 2024
1 parent 32d4471 commit 9892217
Show file tree
Hide file tree
Showing 17 changed files with 1,205 additions and 87 deletions.
142 changes: 125 additions & 17 deletions lib/compiler/src/beam_asm.erl
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,14 @@

-export_type([fail/0,label/0,src/0,module_code/0,function_name/0]).

-import(lists, [append/1,duplicate/2,map/2,member/2,keymember/3,splitwith/2]).
-import(lists, [append/1,duplicate/2,keymember/3,last/1,map/2,
member/2,splitwith/2]).

-include("beam_opcodes.hrl").
-include("beam_asm.hrl").

-define(BEAM_DEBUG_INFO_VERSION, 0).

%% Common types for describing operands for BEAM instructions.
-type src() :: beam_reg() |
{'literal',term()} |
Expand Down Expand Up @@ -60,23 +63,24 @@
-define(BEAMFILE_EXECUTABLE_LINE, 1).
-define(BEAMFILE_FORCE_LINE_COUNTERS, 2).

-spec module(module_code(), [{binary(), binary()}], [{atom(),term()}], [compile:option()]) ->
{'ok',binary()}.

module(Code, ExtraChunks, CompileInfo, CompilerOpts) ->
{ok,assemble(Code, ExtraChunks, CompileInfo, CompilerOpts)}.
-spec module(module_code(), [{binary(), binary()}],
[{atom(),term()}], [compile:option()]) ->
{'ok',binary()}.

assemble({Mod,Exp0,Attr0,Asm0,NumLabels}, ExtraChunks, CompileInfo, CompilerOpts) ->
module(Code0, ExtraChunks, CompileInfo, CompilerOpts) ->
{Mod,Exp0,Attr0,Asm0,NumLabels} = Code0,
{1,Dict0} = beam_dict:atom(Mod, beam_dict:new()),
{0,Dict1} = beam_dict:fname(atom_to_list(Mod) ++ ".erl", Dict0),
{0,Dict2} = beam_dict:type(any, Dict1),
Dict3 = reject_unsupported_versions(Dict2),

NumFuncs = length(Asm0),
{Asm,Attr} = on_load(Asm0, Attr0),
Exp = sets:from_list(Exp0),
{Code,Dict} = assemble_1(Asm, Exp, Dict3, []),
build_file(Code, Attr, Dict, NumLabels, NumFuncs,
ExtraChunks, CompileInfo, CompilerOpts).
{Code,Dict} = assemble(Asm, Exp, Dict3, []),
Beam = build_file(Code, Attr, Dict, NumLabels, NumFuncs,
ExtraChunks, CompileInfo, CompilerOpts),
{ok,Beam}.

reject_unsupported_versions(Dict) ->
%% Emit an instruction that was added in our lowest supported
Expand Down Expand Up @@ -106,16 +110,16 @@ insert_on_load_instruction(Is0, Entry) ->
end, Is0),
Bef ++ [El,on_load|Is].

assemble_1([{function,Name,Arity,Entry,Asm}|T], Exp, Dict0, Acc) ->
assemble([{function,Name,Arity,Entry,Asm}|T], Exp, Dict0, Acc) ->
Dict1 = case sets:is_element({Name,Arity}, Exp) of
true ->
beam_dict:export(Name, Arity, Entry, Dict0);
false ->
beam_dict:local(Name, Arity, Entry, Dict0)
end,
{Code, Dict2} = assemble_function(Asm, Acc, Dict1),
assemble_1(T, Exp, Dict2, Code);
assemble_1([], _Exp, Dict0, Acc) ->
assemble(T, Exp, Dict2, Code);
assemble([], _Exp, Dict0, Acc) ->
{IntCodeEnd,Dict1} = make_op(int_code_end, Dict0),
{list_to_binary(lists:reverse(Acc, [IntCodeEnd])),Dict1}.

Expand All @@ -125,17 +129,23 @@ assemble_function([H|T], Acc, Dict0) ->
assemble_function([], Code, Dict) ->
{Code, Dict}.

build_file(Code, Attr, Dict, NumLabels, NumFuncs, ExtraChunks0, CompileInfo, CompilerOpts) ->
build_file(Code, Attr, Dict0, NumLabels, NumFuncs, ExtraChunks0,
CompileInfo, CompilerOpts) ->
%% Create the code chunk.

CodeChunk = chunk(<<"Code">>,
<<16:32,
(beam_opcodes:format_number()):32,
(beam_dict:highest_opcode(Dict)):32,
(beam_dict:highest_opcode(Dict0)):32,
NumLabels:32,
NumFuncs:32>>,
Code),

%% Build the BEAM debug information chunk. It is important
%% to build it early, because it will add entries to the
%% atom and literal tables.
{ExtraChunks1,Dict} = build_beam_debug_info(ExtraChunks0, CompilerOpts, Dict0),

%% Create the atom table chunk.
{NumAtoms, AtomTab} = beam_dict:atom_table(Dict),
AtomChunk = chunk(<<"AtU8">>, <<NumAtoms:32>>, AtomTab),
Expand Down Expand Up @@ -194,13 +204,14 @@ build_file(Code, Attr, Dict, NumLabels, NumFuncs, ExtraChunks0, CompileInfo, Com
TypeTab),

%% Create the meta chunk
Meta = proplists:get_value(<<"Meta">>, ExtraChunks0, empty),
Meta = proplists:get_value(<<"Meta">>, ExtraChunks1, empty),
MetaChunk = case Meta of
empty -> [];
Meta -> chunk(<<"Meta">>, Meta)
end,

%% Remove Meta chunk from ExtraChunks since it is essential
ExtraChunks = ExtraChunks0 -- [{<<"Meta">>, Meta}],
ExtraChunks = ExtraChunks1 -- [{<<"Meta">>, Meta}],

%% Create the attributes and compile info chunks.

Expand Down Expand Up @@ -352,6 +363,99 @@ filter_essentials([<<>>|T]) ->
filter_essentials(T);
filter_essentials([]) -> [].

%%%
%%% Build the BEAM debug information chunk.
%%%

build_beam_debug_info(ExtraChunks, CompilerOpts, Dict) ->
case member(beam_debug_info, CompilerOpts) of
true ->
build_beam_debug_info_1(ExtraChunks, Dict);
false ->
{ExtraChunks,Dict}
end.

build_beam_debug_info_1(ExtraChunks0, Dict0) ->
DebugTab0 = beam_dict:debug_table(Dict0),
DebugTab1 = [{Index,Info} ||
Index := Info <- maps:iterator(DebugTab0, ordered)],
DebugTab = build_bdi_fill_holes(DebugTab1),
NumVars = lists:sum([length(Vs) || {_,Vs} <- DebugTab]),
{Contents0,Dict} = build_bdi(DebugTab, Dict0),
NumItems = length(Contents0),
Contents1 = iolist_to_binary(Contents0),

0 = NumVars bsr 31, %Assertion.

Contents = <<?BEAM_DEBUG_INFO_VERSION:32,
NumItems:32,
NumVars:32,
Contents1/binary>>,
ExtraChunks = [{~"DbgB",Contents}|ExtraChunks0],
{ExtraChunks,Dict}.

build_bdi_fill_holes([{_,Item}]) ->
[Item];
build_bdi_fill_holes([{I0,Item}|[{I1,_}|_]=T]) ->
case I0 + 1 of
I1 ->
[Item|build_bdi_fill_holes(T)];
Next ->
NewPair = {Next,{none,[]}},
[Item|build_bdi_fill_holes([NewPair|T])]
end.

build_bdi([{FrameSize0,Vars0}|Items], Dict0) ->
%% The debug information utilizes the encoding machinery for BEAM
%% instructions. The debug information for `executable_line`
%% instruction is translated to:
%%
%% {call,FrameSize,{list,[VariableName,Where,...]}}
%%
%% Where:
%%
%% FrameSize := 'none' | 0..1023
%% VariableName := binary()
%% Where := {x,0..1023} | {y,0..1023} | {literal,_} |
%% {integer,_} | {atom,_} | {float,_} | nil
%%
%% The only reason the `call` instruction is used is because it
%% has two operands.
%%
%% The following example:
%%
%% {executable_line,[...],1,
%% {4, [{'Args',[{y,3}]},
%% {'Line',[{y,2}]},
%% {'Live',[{x,0},{y,1}]}]}}.
%%
%% will be translated to the following instruction:
%%
%% {call,4,{list,[{literal,<<"Args">>},{y,3},
%% {literal,<<"Line">>},{y,2},
%% {literal,<<"Live">>},{y,1}]}}
%%
%% Note that only one register is given for each variable. It
%% is always the last register listed.

FrameSize = case FrameSize0 of
none -> nil;
_ -> FrameSize0
end,
Vars1 = [[{literal,atom_to_binary(Name)},last(Regs)] ||
{Name,[_|_]=Regs} <- Vars0],
Vars = append(Vars1),
Instr0 = {call,FrameSize,{list,Vars}},
{Instr,Dict1} = make_op(Instr0, Dict0),
{Tail,Dict2} = build_bdi(Items, Dict1),
{[Instr|Tail],Dict2};
build_bdi([], Dict) ->
{[],Dict}.

%%%
%%% Functions for assembling BEAM instruction.
%%%

bif_type(fnegate, 1) -> {op,fnegate};
bif_type(fadd, 2) -> {op,fadd};
bif_type(fsub, 2) -> {op,fsub};
Expand All @@ -368,6 +472,10 @@ make_op({line=Op,Location}, Dict0) ->
make_op({executable_line=Op,Location,Index}, Dict0) ->
{LocationIndex,Dict} = beam_dict:line(Location, Dict0, Op),
encode_op(executable_line, [LocationIndex,Index], Dict);
make_op({executable_line=Op,Location,Index,DebugInfo}, Dict0) ->
{LocationIndex,Dict1} = beam_dict:line(Location, Dict0, Op),
Dict = beam_dict:debug_info(Index, DebugInfo, Dict1),
encode_op(executable_line, [LocationIndex,Index], Dict);
make_op({bif, Bif, {f,_}, [], Dest}, Dict) ->
%% BIFs without arguments cannot fail.
encode_op(bif0, [{extfunc, erlang, Bif, 0}, Dest], Dict);
Expand Down
7 changes: 6 additions & 1 deletion lib/compiler/src/beam_block.erl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
-include("beam_asm.hrl").

-export([module/2]).
-import(lists, [keysort/2,member/2,reverse/1,reverse/2,
-import(lists, [flatmap/2,keysort/2,member/2,reverse/1,reverse/2,
splitwith/2,usort/1]).

-spec module(beam_utils:module_code(), [compile:option()]) ->
Expand Down Expand Up @@ -172,12 +172,17 @@ collect({put_map,{f,0},Op,S,D,R,{list,Puts}}) ->
collect({fmove,S,D}) -> {set,[D],[S],fmove};
collect({fconv,S,D}) -> {set,[D],[S],fconv};
collect({executable_line,_,_}=Line) -> {set,[],[],Line};
collect({executable_line,_,_,_}=Line) -> collect_executable_line(Line);
collect({swap,D1,D2}) ->
Regs = [D1,D2],
{set,Regs,Regs,swap};
collect({make_fun3,F,I,U,D,{list,Ss}}) -> {set,[D],Ss,{make_fun3,F,I,U}};
collect(_) -> error.

collect_executable_line({executable_line,_Loc,_Index,{_,Vars}}=I) ->
Ss = flatmap(fun({_Name,Regs}) -> Regs end, Vars),
{set,[],Ss,I}.

%% embed_lines([Instruction]) -> [Instruction]
%% Combine blocks that would be split by line/1 instructions.
%% Also move a line instruction before a block into the block,
Expand Down
66 changes: 58 additions & 8 deletions lib/compiler/src/beam_core_to_ssa.erl
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
map/2,mapfoldl/3,member/2,
keyfind/3,keysort/2,last/1,
partition/2,reverse/1,reverse/2,
sort/1,sort/2,splitwith/2,
sort/1,sort/2,usort/1,splitwith/2,
zip/2]).
-import(ordsets, [add_element/2,del_element/2,intersection/2,
subtract/2,union/2,union/1]).
Expand All @@ -94,7 +94,7 @@
%% matching. (Construction of those term types is translated directly
%% to SSA instructions.)

-record(cg_tuple, {es}).
-record(cg_tuple, {es,keep=[]}).
-record(cg_map, {var=#b_literal{val=#{}},op,es}).
-record(cg_map_pair, {key,val}).
-record(cg_cons, {hd,tl}).
Expand Down Expand Up @@ -149,7 +149,8 @@ get_anno(#cg_select{anno=Anno}) -> Anno.
funs=[], %Fun functions
free=#{}, %Free variables
ws=[] :: [warning()], %Warnings.
no_min_max_bifs=false :: boolean()
no_min_max_bifs=false :: boolean(),
beam_debug_info=false :: boolean()
}).

-spec module(cerl:c_module(), [compile:option()]) ->
Expand All @@ -159,8 +160,10 @@ module(#c_module{name=#c_literal{val=Mod},exports=Es,attrs=As,defs=Fs}, Options)
Kas = attributes(As),
Kes = map(fun (#c_var{name={_,_}=Fname}) -> Fname end, Es),
NoMinMaxBifs = proplists:get_bool(no_min_max_bifs, Options),
DebugInfo = proplists:get_bool(beam_debug_info, Options),
St0 = #kern{module=Mod,
no_min_max_bifs=NoMinMaxBifs},
no_min_max_bifs=NoMinMaxBifs,
beam_debug_info=DebugInfo},
{Kfs,St} = mapfoldl(fun function/2, St0, Fs),
Body = Kfs ++ St#kern.funs,
Code = #b_module{name=Mod,exports=Kes,attributes=Kas,body=Body},
Expand Down Expand Up @@ -369,6 +372,14 @@ expr(#c_call{anno=A,module=M0,name=F0,args=Cargs}, Sub, St0) ->
args=[M0,F0,cerl:make_list(Cargs)]},
expr(Call, Sub, St)
end;
expr(#c_primop{anno=A0,name=#c_literal{val=executable_line=Op},
args=Cargs}, Sub, #kern{beam_debug_info=true}=St0) ->
{Args,Ap,St1} = atomic_list(Cargs, Sub, St0),
#b_set{anno=A1} = I0 = primop(Op, A0, Args),
{_,Alias} = Sub,
A = A1#{alias => Alias},
I = I0#b_set{anno=A},
{I,Ap,St1};
expr(#c_primop{anno=A,name=#c_literal{val=match_fail},args=[Arg]}, Sub, St) ->
translate_match_fail(Arg, Sub, A, St);
expr(#c_primop{anno=A,name=#c_literal{val=Op},args=Cargs}, Sub, St0) ->
Expand Down Expand Up @@ -1666,9 +1677,24 @@ get_match(#cg_bin_seg{}=Seg, St0) ->
get_match(#cg_bin_int{}=BinInt, St0) ->
{N,St1} = new_var(St0),
{BinInt#cg_bin_int{next=N},[N],St1};
get_match(#cg_tuple{es=Es}, St0) ->
get_match(#cg_tuple{es=Es}, #kern{beam_debug_info=DebugInfo}=St0) ->
{Mes,St1} = new_vars(length(Es), St0),
{#cg_tuple{es=Mes},Mes,St1};
Keep =
case DebugInfo of
true ->
%% Force extraction of all variables mentioned in the
%% original source to give them a chance to appear in
%% the debug information. This is a not guarantee that
%% they will appear, since they can be killed before
%% reaching an `executable_line` instruction.
Keep0 = [New ||
{#b_var{name=Old},#b_var{name=New}} <- lists:zip(Es, Mes),
beam_ssa_codegen:is_original_variable(Old)],
ordsets:from_list(Keep0);
false ->
[]
end,
{#cg_tuple{es=Mes,keep=Keep},Mes,St1};
get_match(#cg_map{op=exact,es=Es0}, St0) ->
{Mes,St1} = new_vars(length(Es0), St0),
{Es,_} = mapfoldl(fun(#cg_map_pair{}=Pair, [V|Vs]) ->
Expand Down Expand Up @@ -2252,12 +2278,13 @@ umatch_list(Ms0, Br, St) ->
{[M1|Ms1],union(Mu, Us),Stb}
end, {[],[],St}, Ms0).

pat_mark_unused(#cg_tuple{es=Es0}=P, Used0, Ps) ->
pat_mark_unused(#cg_tuple{es=Es0,keep=Keep}=P, Used0, Ps) ->
%% Not extracting unused tuple elements is an optimization for
%% compile time and memory use during compilation. It is probably
%% worthwhile because it is common to extract only a few elements
%% from a huge record.
Used = intersection(Used0, Ps),
Used1 = ordsets:union(Used0, Keep),
Used = intersection(Used1, Ps),
Es = [case member(V, Used) of
true -> Var;
false -> #b_literal{val=unused}
Expand Down Expand Up @@ -2374,6 +2401,29 @@ cg(#b_set{op=copy,dst=#b_var{name=Dst},args=[Arg0]}, St0) ->
Arg = ssa_arg(Arg0, St0),
St = set_ssa_var(Dst, Arg, St0),
{[],St};
cg(#b_set{anno=Anno0,op=executable_line,args=Args0}=Set0, St) ->
Args = ssa_args(Args0, St),
Literals = [{Val,From} || From := #b_literal{val=Val} <- St#cg.vars],
Anno1 = Anno0#{literals => Literals},
NewAlias = [{To,From} || From := #b_var{name=To} <- St#cg.vars],
case NewAlias of
[_|_] ->
Alias0 = maps:get(alias, Anno0, #{}),
Alias1 = foldl(fun({To,From}, A) ->
case A of
#{To := Vars0} ->
Vars1 = usort([From|Vars0]),
A#{To := Vars1};
#{} ->
A#{To => [From]}
end
end, Alias0, NewAlias),
Anno = Anno1#{alias => Alias1},
Set = Set0#b_set{anno=Anno,args=Args},
{[Set],St};
[] ->
{[Set0#b_set{anno=Anno1,args=Args}],St}
end;
cg(#b_set{args=Args0}=Set0, St) ->
Args = ssa_args(Args0, St),
Set = Set0#b_set{args=Args},
Expand Down
Loading

0 comments on commit 9892217

Please sign in to comment.