From cca94f65d964b4912b69ec44348f5aa1fdb8f324 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 20 Sep 2025 10:31:38 +0200 Subject: [PATCH 01/46] JIT: Optimize calls to TERM_COMPARE Signed-off-by: Paul Guyot --- libs/jit/src/jit.erl | 51 +++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 02c2b3a6e..01e3bacbd 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -528,16 +528,26 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), {MSt2, Arg2, Rest3} = decode_compact_term(Rest2, MMod, MSt1, State0), ?TRACE("OP_IS_EQ_EXACT ~p, ~p, ~p\n", [Label, Arg1, Arg2]), - {MSt3, ResultReg} = MMod:call_primitive(MSt2, ?PRIM_TERM_COMPARE, [ - ctx, jit_state, {free, Arg1}, {free, Arg2}, ?TERM_COMPARE_EXACT - ]), - MSt4 = handle_error_if({'(int)', ResultReg, '==', ?TERM_COMPARE_MEMORY_ALLOC_FAIL}, MMod, MSt3), - MSt5 = cond_jump_to_label( - {{free, ResultReg}, '&', ?TERM_LESS_THAN + ?TERM_GREATER_THAN, '!=', 0}, - Label, - MMod, - MSt4 - ), + % If Arg2 is an immediate, we don't need to call term_compare + MSt5 = + if + is_integer(Arg2) -> + {MSt3, Arg1Reg} = MMod:move_to_native_register(MSt2, Arg1), + cond_jump_to_label({{free, Arg1Reg}, '!=', Arg2}, Label, MMod, MSt3); + true -> + {MSt3, ResultReg} = MMod:call_primitive(MSt2, ?PRIM_TERM_COMPARE, [ + ctx, jit_state, {free, Arg1}, {free, Arg2}, ?TERM_COMPARE_EXACT + ]), + MSt4 = handle_error_if( + {'(int)', ResultReg, '==', ?TERM_COMPARE_MEMORY_ALLOC_FAIL}, MMod, MSt3 + ), + cond_jump_to_label( + {{free, ResultReg}, '&', ?TERM_LESS_THAN + ?TERM_GREATER_THAN, '!=', 0}, + Label, + MMod, + MSt4 + ) + end, ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 44 @@ -547,11 +557,22 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), {MSt2, Arg2, Rest3} = decode_compact_term(Rest2, MMod, MSt1, State0), ?TRACE("OP_IS_NOT_EQ_EXACT ~p, ~p, ~p\n", [Label, Arg1, Arg2]), - {MSt3, ResultReg} = MMod:call_primitive(MSt2, ?PRIM_TERM_COMPARE, [ - ctx, jit_state, {free, Arg1}, {free, Arg2}, ?TERM_COMPARE_EXACT - ]), - MSt4 = handle_error_if({'(int)', ResultReg, '==', ?TERM_COMPARE_MEMORY_ALLOC_FAIL}, MMod, MSt3), - MSt5 = cond_jump_to_label({'(int)', {free, ResultReg}, '==', ?TERM_EQUALS}, Label, MMod, MSt4), + MSt5 = + if + is_integer(Arg2) -> + {MSt3, Arg1Reg} = MMod:move_to_native_register(MSt2, Arg1), + cond_jump_to_label({{free, Arg1Reg}, '==', Arg2}, Label, MMod, MSt3); + true -> + {MSt3, ResultReg} = MMod:call_primitive(MSt2, ?PRIM_TERM_COMPARE, [ + ctx, jit_state, {free, Arg1}, {free, Arg2}, ?TERM_COMPARE_EXACT + ]), + MSt4 = handle_error_if( + {'(int)', ResultReg, '==', ?TERM_COMPARE_MEMORY_ALLOC_FAIL}, MMod, MSt3 + ), + cond_jump_to_label( + {'(int)', {free, ResultReg}, '==', ?TERM_EQUALS}, Label, MMod, MSt4 + ) + end, ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 45 From 1075c1a8d8e7d660d60adb67972fa3464a1f5149 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Wed, 17 Sep 2025 21:12:25 +0200 Subject: [PATCH 02/46] JIT: optimize calls to term_to_int using types Signed-off-by: Paul Guyot --- libs/estdlib/src/code_server.erl | 17 +- libs/jit/src/jit.erl | 43 +++- libs/jit/src/jit_precompile.erl | 106 +++++++++- src/libAtomVM/iff.c | 3 + src/libAtomVM/iff.h | 6 +- src/libAtomVM/module.c | 202 +++++++++++++++++++ src/libAtomVM/module.h | 12 ++ src/libAtomVM/nifs.c | 20 ++ src/libAtomVM/nifs.gperf | 1 + tests/erlang_tests/CMakeLists.txt | 5 +- tests/erlang_tests/test_code_server_nifs.erl | 129 ++++++++++++ tests/libs/jit/jit_tests.erl | 62 +++++- tests/test.c | 2 + tools/packbeam/packbeam.c | 4 + 14 files changed, 591 insertions(+), 21 deletions(-) create mode 100644 tests/erlang_tests/test_code_server_nifs.erl diff --git a/libs/estdlib/src/code_server.erl b/libs/estdlib/src/code_server.erl index 11266a1ca..427d5fa52 100644 --- a/libs/estdlib/src/code_server.erl +++ b/libs/estdlib/src/code_server.erl @@ -38,6 +38,7 @@ code_chunk/1, atom_resolver/2, literal_resolver/2, + type_resolver/2, set_native_code/3 ]). @@ -126,6 +127,14 @@ atom_resolver(_Module, _Index) -> literal_resolver(_Module, _Index) -> erlang:nif_error(undefined). +%% @doc Get a type from its index +%% @return The type information +%% @param Module module get a type from +%% @param Index type index in the module +-spec type_resolver(Module :: module(), Index :: non_neg_integer()) -> any(). +type_resolver(_Module, _Index) -> + erlang:nif_error(undefined). + %% @doc Associate a native code stream with a module %% @return ok %% @param Module module to set the native code of @@ -154,10 +163,16 @@ load(Module) -> LiteralResolver = fun(Index) -> code_server:literal_resolver(Module, Index) end, + TypeResolver = fun(Index) -> code_server:type_resolver(Module, Index) end, Stream0 = jit:stream(jit_mmap_size(byte_size(Code))), {BackendModule, BackendState0} = jit:backend(Stream0), {LabelsCount, BackendState1} = jit:compile( - Code, AtomResolver, LiteralResolver, BackendModule, BackendState0 + Code, + AtomResolver, + LiteralResolver, + TypeResolver, + BackendModule, + BackendState0 ), Stream1 = BackendModule:stream(BackendState1), code_server:set_native_code(Module, LabelsCount, Stream1), diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 01e3bacbd..8dc76772b 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -24,7 +24,7 @@ stream/1, backend/1, beam_chunk_header/3, - compile/5 + compile/6 ]). % NIFs @@ -98,7 +98,8 @@ line_offsets :: [{integer(), integer()}], labels_count :: pos_integer(), atom_resolver :: fun((integer()) -> atom()), - literal_resolver :: fun((integer()) -> any()) + literal_resolver :: fun((integer()) -> any()), + type_resolver :: fun((integer()) -> any()) }). -type stream() :: any(). @@ -130,6 +131,7 @@ compile( <<16:32, 0:32, OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, Opcodes/binary>>, AtomResolver, LiteralResolver, + TypeResolver, MMod, MSt0 ) when OpcodeMax =< ?OPCODE_MAX -> @@ -138,7 +140,8 @@ compile( line_offsets = [], labels_count = LabelsCount, atom_resolver = AtomResolver, - literal_resolver = LiteralResolver + literal_resolver = LiteralResolver, + type_resolver = TypeResolver }, {State1, MSt2} = first_pass(Opcodes, MMod, MSt1, State0), MSt3 = second_pass(MMod, MSt2, State1), @@ -147,11 +150,12 @@ compile( <<16:32, 0:32, OpcodeMax:32, _LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>>, _AtomResolver, _LiteralResolver, + _TypeResolver, _MMod, _MSt ) -> error(badarg, [OpcodeMax]); -compile(CodeChunk, _AtomResolver, _LiteralResolver, _MMod, _MSt) -> +compile(CodeChunk, _AtomResolver, _LiteralResolver, _TypeResolver, _MMod, _MSt) -> error(badarg, [CodeChunk]). % 1 @@ -1143,7 +1147,7 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), - {MSt2, ArityTerm, Rest3} = decode_compact_term(Rest2, MMod, MSt1, State0), + {MSt2, ArityTerm, Rest3} = decode_typed_compact_term(Rest2, MMod, MSt1, State0), ?TRACE("OP_IS_FUNCTION2 ~p,~p,~p\n", [Label, Arg1, ArityTerm]), {MSt3, FuncPtr} = term_is_boxed_with_tag_and_get_ptr(Label, Arg1, ?TERM_BOXED_FUN, MMod, MSt2), {MSt4, Arity} = term_to_int(ArityTerm, Label, MMod, MSt3), @@ -1174,7 +1178,7 @@ first_pass(<>, MMod, MSt0, State0) -> {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), {_Live, Rest3} = decode_literal(Rest2), - {MSt2, Size, Rest4} = decode_compact_term(Rest3, MMod, MSt1, State0), + {MSt2, Size, Rest4} = decode_typed_compact_term(Rest3, MMod, MSt1, State0), {Unit, Rest5} = decode_literal(Rest4), {FlagsValue, Rest6} = decode_literal(Rest5), {MSt3, SrcReg} = MMod:move_to_native_register(MSt2, Src), @@ -1213,7 +1217,7 @@ first_pass(<>, MMod, MSt0, State0) -> {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), {_Live, Rest3} = decode_literal(Rest2), - {MSt2, Size, Rest4} = decode_compact_term(Rest3, MMod, MSt1, State0), + {MSt2, Size, Rest4} = decode_typed_compact_term(Rest3, MMod, MSt1, State0), {Unit, Rest5} = decode_literal(Rest4), {FlagsValue, Rest6} = decode_literal(Rest5), {MSt3, SrcReg} = MMod:move_to_native_register(MSt2, Src), @@ -1338,7 +1342,7 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), - {MSt2, Size, Rest3} = decode_compact_term(Rest2, MMod, MSt1, State0), + {MSt2, Size, Rest3} = decode_typed_compact_term(Rest2, MMod, MSt1, State0), {Unit, Rest4} = decode_literal(Rest3), {_FlagsValue, Rest5} = decode_literal(Rest4), ?TRACE("OP_BS_SKIP_BITS2 ~p, ~p, ~p, ~p, ~p\n", [Fail, Src, Size, Unit, _FlagsValue]), @@ -2071,7 +2075,7 @@ first_pass(<>, MMod, MSt0, State0) -> first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Src, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), - {MSt2, Pos, Rest2} = decode_compact_term(Rest1, MMod, MSt1, State0), + {MSt2, Pos, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt1, State0), ?TRACE("OP_BS_SET_POSITION ~p, ~p\n", [Src, Pos]), {MSt3, MatchStateReg} = MMod:move_to_native_register(MSt2, Src), {MSt4, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt3, {free, MatchStateReg}), @@ -2814,7 +2818,7 @@ first_pass_bs_match_integer( {_Live, Rest1} = decode_literal(Rest0), {Flags, Rest2} = decode_compile_time_literal(Rest1, State0), {MSt1, FlagsValue} = decode_flags_list(Flags, MMod, MSt0), - {MSt2, Size, Rest3} = decode_compact_term(Rest2, MMod, MSt0, State0), + {MSt2, Size, Rest3} = decode_typed_compact_term(Rest2, MMod, MSt0, State0), {Unit, Rest4} = decode_literal(Rest3), ?TRACE("{integer,~p,~p,~p, ", [Flags, Size, Unit]), {MSt3, SizeReg} = term_to_int(Size, 0, MMod, MSt1), @@ -3164,6 +3168,14 @@ term_to_int(Term, _FailLabel, _MMod, MSt0) when is_integer(Term) -> {MSt0, Term bsr 4}; term_to_int({literal, Val}, _FailLabel, _MMod, MSt0) when is_integer(Val) -> {MSt0, Val}; +% Optimized case: when we have type information showing this is an integer, skip the type check +term_to_int({typed, Term, {t_integer, _Range}}, _FailLabel, MMod, MSt0) -> + {MSt1, Reg} = MMod:move_to_native_register(MSt0, Term), + MSt2 = MMod:shift_right(MSt1, Reg, 4), + {MSt2, Reg}; +term_to_int({typed, Term, _NonIntegerType}, FailLabel, MMod, MSt0) -> + % Type information shows it's not an integer, fall back to generic path + term_to_int(Term, FailLabel, MMod, MSt0); term_to_int(Term, FailLabel, MMod, MSt0) -> {MSt1, Reg} = MMod:move_to_native_register(MSt0, Term), MSt2 = cond_raise_badarg_or_jump_to_fail_label( @@ -3336,6 +3348,17 @@ decode_compact_term(<<_Value:5, ?COMPACT_LITERAL:3, _Rest/binary>> = Binary, _MM decode_compact_term(Other, MMod, MSt, _State) -> decode_dest(Other, MMod, MSt). +% Decode compact term with type information awareness +decode_typed_compact_term(<>, MMod, MSt0, #state{ + type_resolver = TypeResover +}) -> + {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0), + {TypeIx, Rest2} = decode_literal(Rest1), + Type = TypeResover(TypeIx), + {MSt1, {typed, Dest, Type}, Rest2}; +decode_typed_compact_term(Other, MMod, MSt, State) -> + decode_compact_term(Other, MMod, MSt, State). + skip_compact_term(<<_:4, ?COMPACT_INTEGER:4, _Rest/binary>> = Bin) -> {_Value, Rest} = decode_value64(Bin), Rest; diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl index a50f375b9..f358138e6 100644 --- a/libs/jit/src/jit_precompile.erl +++ b/libs/jit/src/jit_precompile.erl @@ -19,7 +19,7 @@ % -module(jit_precompile). --export([start/0, compile/3]). +-export([start/0, compile/3, atom_resolver/1, type_resolver/1]). -include_lib("jit.hrl"). @@ -36,8 +36,7 @@ compile(Target, Dir, Path) -> FilteredChunks = lists:keydelete("Code", 1, FilteredChunks0), {"Code", CodeChunk} = lists:keyfind("Code", 1, InitialChunks), {"AtU8", AtomChunk} = lists:keyfind("AtU8", 1, InitialChunks), - Atoms = parse_atom_chunk(AtomChunk), - AtomResolver = fun(Index) -> lists:nth(Index, Atoms) end, + AtomResolver = atom_resolver(AtomChunk), LiteralsChunk = case lists:keyfind("LitU", 1, InitialChunks) of {"LitU", LiteralsChunk0} -> @@ -52,8 +51,16 @@ compile(Target, Dir, Path) -> <<>> end end, - Literals = parse_literals_chunk(LiteralsChunk), - LiteralResolver = fun(Index) -> lists:nth(Index + 1, Literals) end, + LiteralResolver = literal_resolver(LiteralsChunk), + + TypesChunk = + case lists:keyfind("Type", 1, InitialChunks) of + {"Type", TypesChunk0} -> + TypesChunk0; + false -> + <<>> + end, + TypeResolver = type_resolver(TypesChunk), Stream0 = jit_stream_binary:new(0), <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = @@ -64,7 +71,7 @@ compile(Target, Dir, Path) -> Backend = list_to_atom("jit_" ++ Target), Stream2 = Backend:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), {LabelsCount, Stream3} = jit:compile( - CodeChunk, AtomResolver, LiteralResolver, Backend, Stream2 + CodeChunk, AtomResolver, LiteralResolver, TypeResolver, Backend, Stream2 ), NativeCode = Backend:stream(Stream3), UpdatedChunks = FilteredChunks ++ [{"avmN", NativeCode}], @@ -78,6 +85,10 @@ compile(Target, Dir, Path) -> io:format("Unimplemented opcode ~p (~s)\n", [Opcode, Path]) end. +atom_resolver(AtomChunk) -> + Atoms = parse_atom_chunk(AtomChunk), + fun(Index) -> lists:nth(Index, Atoms) end. + parse_atom_chunk(<>) -> if AtomCount < 0 -> @@ -100,6 +111,10 @@ parse_atom_chunk_old_format(<>, Acc) -> parse_atom_chunk_old_format(<<>>, Acc) -> lists:reverse(Acc). +literal_resolver(LiteralsChunk) -> + Literals = parse_literals_chunk(LiteralsChunk), + fun(Index) -> lists:nth(Index + 1, Literals) end. + parse_literals_chunk(<>) -> parse_literals_chunk0(TermsCount, Rest, []); parse_literals_chunk(<<>>) -> @@ -110,3 +125,82 @@ parse_literals_chunk0(0, <<>>, Acc) -> parse_literals_chunk0(N, <>, Acc) -> Term = binary_to_term(TermBin), parse_literals_chunk0(N - 1, Rest, [Term | Acc]). + +%% Version (from beam_types.hrl) +-define(BEAM_TYPES_VERSION, 3). + +%% Type chunk constants (from beam_types.erl) +-define(BEAM_TYPE_ATOM, (1 bsl 0)). +-define(BEAM_TYPE_BITSTRING, (1 bsl 1)). +-define(BEAM_TYPE_CONS, (1 bsl 2)). +-define(BEAM_TYPE_FLOAT, (1 bsl 3)). +-define(BEAM_TYPE_FUN, (1 bsl 4)). +-define(BEAM_TYPE_INTEGER, (1 bsl 5)). +-define(BEAM_TYPE_MAP, (1 bsl 6)). +-define(BEAM_TYPE_NIL, (1 bsl 7)). +-define(BEAM_TYPE_PID, (1 bsl 8)). +-define(BEAM_TYPE_PORT, (1 bsl 9)). +-define(BEAM_TYPE_REFERENCE, (1 bsl 10)). +-define(BEAM_TYPE_TUPLE, (1 bsl 11)). + +-define(BEAM_TYPE_HAS_LOWER_BOUND, (1 bsl 12)). +-define(BEAM_TYPE_HAS_UPPER_BOUND, (1 bsl 13)). +-define(BEAM_TYPE_HAS_UNIT, (1 bsl 14)). + +type_resolver(<>) when Version =:= ?BEAM_TYPES_VERSION -> + Types = parse_type_entries(TypeData, []), + fun(Index) -> lists:nth(Index + 1, Types) end; +type_resolver(_) -> + fun(_) -> any end. + +parse_type_entries(<<>>, Acc) -> + lists:reverse(Acc); +parse_type_entries( + <<0:1, HasUnit:1, HasUpperBound:1, HasLowerBound:1, TypeBits:12, Rest0/binary>>, Acc +) -> + {Rest, LowerBound, UpperBound, Unit} = parse_extra( + HasLowerBound, HasUpperBound, HasUnit, Rest0, '-inf', '+inf', 1 + ), + Type = + case TypeBits of + ?BEAM_TYPE_ATOM -> + t_atom; + ?BEAM_TYPE_BITSTRING -> + {t_bs_matchable, Unit}; + ?BEAM_TYPE_CONS -> + t_cons; + ?BEAM_TYPE_FLOAT -> + t_float; + ?BEAM_TYPE_FUN -> + t_fun; + ?BEAM_TYPE_FLOAT bor ?BEAM_TYPE_INTEGER -> + {t_number, {LowerBound, UpperBound}}; + ?BEAM_TYPE_INTEGER -> + {t_integer, {LowerBound, UpperBound}}; + ?BEAM_TYPE_MAP -> + t_map; + ?BEAM_TYPE_NIL -> + nil; + ?BEAM_TYPE_NIL bor ?BEAM_TYPE_CONS -> + t_list; + ?BEAM_TYPE_PID -> + pid; + ?BEAM_TYPE_PORT -> + port; + ?BEAM_TYPE_REFERENCE -> + reference; + ?BEAM_TYPE_TUPLE -> + t_tuple; + _ -> + any + end, + parse_type_entries(Rest, [Type | Acc]). + +parse_extra(1, HasUpperBound, HasUnit, <>, '-inf', '+inf', 1) -> + parse_extra(0, HasUpperBound, HasUnit, Rest, Value, '+inf', 1); +parse_extra(0, 1, HasUnit, <>, LowerBound, '+inf', 1) -> + parse_extra(0, 0, HasUnit, Rest, LowerBound, Value, 1); +parse_extra(0, 0, 1, <>, LowerBound, UpperBound, 1) -> + parse_extra(0, 0, 0, Rest, LowerBound, UpperBound, Value + 1); +parse_extra(0, 0, 0, Rest, LowerBound, UpperBound, Unit) -> + {Rest, LowerBound, UpperBound, Unit}. diff --git a/src/libAtomVM/iff.c b/src/libAtomVM/iff.c index c09543e84..dea4e8089 100644 --- a/src/libAtomVM/iff.c +++ b/src/libAtomVM/iff.c @@ -100,6 +100,9 @@ void scan_iff(const void *iff_binary, int buf_size, unsigned long *offsets, unsi } else if (!memcmp(current_record->name, "avmN", 4)) { offsets[AVMN] = current_pos; sizes[AVMN] = ENDIAN_SWAP_32(current_record->size); + } else if (!memcmp(current_record->name, "Type", 4)) { + offsets[TYPE] = current_pos; + sizes[TYPE] = ENDIAN_SWAP_32(current_record->size); } current_pos += iff_align(ENDIAN_SWAP_32(current_record->size) + 8); diff --git a/src/libAtomVM/iff.h b/src/libAtomVM/iff.h index d29ee6c48..68d73c441 100644 --- a/src/libAtomVM/iff.h +++ b/src/libAtomVM/iff.h @@ -58,11 +58,13 @@ extern "C" { #define LINT 9 /** Native code section */ #define AVMN 10 +/** Type table section */ +#define TYPE 11 /** Required size for offsets array */ -#define MAX_OFFS 11 +#define MAX_OFFS 12 /** Required size for sizes array */ -#define MAX_SIZES 11 +#define MAX_SIZES 12 /** sizeof IFF section header in bytes */ #define IFF_SECTION_HEADER_SIZE 8 diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c index 0632fd608..18d987f5e 100644 --- a/src/libAtomVM/module.c +++ b/src/libAtomVM/module.c @@ -42,6 +42,29 @@ #include #endif +// BEAM Type constants from OTP source code: +// /opt/src/otp/lib/compiler/src/beam_types.erl lines 1446-1461 +#define BEAM_TYPE_ATOM (1 << 0) +#define BEAM_TYPE_BITSTRING (1 << 1) +#define BEAM_TYPE_CONS (1 << 2) +#define BEAM_TYPE_FLOAT (1 << 3) +#define BEAM_TYPE_FUN (1 << 4) +#define BEAM_TYPE_INTEGER (1 << 5) +#define BEAM_TYPE_MAP (1 << 6) +#define BEAM_TYPE_NIL (1 << 7) +#define BEAM_TYPE_PID (1 << 8) +#define BEAM_TYPE_PORT (1 << 9) +#define BEAM_TYPE_REFERENCE (1 << 10) +#define BEAM_TYPE_TUPLE (1 << 11) + +#define BEAM_TYPE_HAS_LOWER_BOUND (1 << 12) +#define BEAM_TYPE_HAS_UPPER_BOUND (1 << 13) +#define BEAM_TYPE_HAS_UNIT (1 << 14) + +// BEAM Types version from OTP source code: +// /opt/src/otp/lib/compiler/src/beam_types.hrl line 22 +#define BEAM_TYPES_VERSION 3 + #define LITT_UNCOMPRESSED_SIZE_OFFSET 8 #define LITT_HEADER_SIZE 12 @@ -377,6 +400,12 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary mod->free_literals_data = 0; } + if (offsets[TYPE]) { + mod->types_data = beam_file + offsets[TYPE] + IFF_SECTION_HEADER_SIZE; + } else { + mod->types_data = NULL; + } + #ifndef AVM_NO_JIT if (mod->native_code == NULL) { #endif @@ -514,6 +543,179 @@ term module_load_literal(Module *mod, int index, Context *ctx) return t; } +term module_get_type_by_index(const Module *mod, int type_index, Context *ctx) +{ + if (IS_NULL_PTR(mod->types_data)) { + // No Type chunk available, return 'any' + return globalcontext_make_atom(ctx->global, ATOM_STR("\x3", "any")); + } + + const uint8_t *types_data = (const uint8_t *) mod->types_data; + + // Parse Type chunk header: Version:32, Count:32 + uint32_t version = READ_32_UNALIGNED(types_data); + uint32_t count = READ_32_UNALIGNED(types_data + 4); + + // Check if version is supported + if (version != BEAM_TYPES_VERSION) { + return globalcontext_make_atom(ctx->global, ATOM_STR("\x3", "any")); + } + + // Check bounds + if (type_index >= (int) count) { + return globalcontext_make_atom(ctx->global, ATOM_STR("\x3", "any")); + } + + // Skip to type data + const uint8_t *type_entries = types_data + 8; + const uint8_t *pos = type_entries; + + // Skip to the requested type index + for (int i = 0; i < type_index; i++) { + uint16_t type_bits = READ_16_UNALIGNED(pos); + pos += 2; + + // Skip extra data if present + if (type_bits & BEAM_TYPE_HAS_LOWER_BOUND) pos += 8; + if (type_bits & BEAM_TYPE_HAS_UPPER_BOUND) pos += 8; + if (type_bits & BEAM_TYPE_HAS_UNIT) pos += 1; + } + + // Read the target type + uint16_t type_bits = READ_16_UNALIGNED(pos); + pos += 2; + + // Parse extra data for bounds and unit + int64_t lower_bound = INT64_MIN; + int64_t upper_bound = INT64_MAX; + uint8_t unit = 1; + bool has_lower = false; + bool has_upper = false; + + if (type_bits & BEAM_TYPE_HAS_LOWER_BOUND) { + lower_bound = (int64_t) READ_64_UNALIGNED(pos); + pos += 8; + has_lower = true; + } + if (type_bits & BEAM_TYPE_HAS_UPPER_BOUND) { + upper_bound = (int64_t) READ_64_UNALIGNED(pos); + pos += 8; + has_upper = true; + } + if (type_bits & BEAM_TYPE_HAS_UNIT) { + unit = *pos + 1; // Stored as unit-1 + pos += 1; + } + + // Decode type based on TypeBits (matching jit_precompile.erl exact pattern matching) + // From OTP source code: /opt/src/otp/lib/compiler/src/beam_types.erl decode_type function + uint16_t type_pattern = type_bits & 0xFFF; // Mask out flags, keep type bits + + switch (type_pattern) { + case BEAM_TYPE_ATOM: + return globalcontext_make_atom(ctx->global, ATOM_STR("\x6", "t_atom")); + + case BEAM_TYPE_BITSTRING: + if (type_bits & BEAM_TYPE_HAS_UNIT) { + if (UNLIKELY(memory_ensure_free(ctx, TUPLE_SIZE(2)) != MEMORY_GC_OK)) { + return globalcontext_make_atom(ctx->global, ATOM_STR("\x3", "any")); + } + term type_tuple = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(type_tuple, 0, globalcontext_make_atom(ctx->global, ATOM_STR("\xD", "t_bs_matchable"))); + term_put_tuple_element(type_tuple, 1, term_from_int32(unit)); + return type_tuple; + } + return globalcontext_make_atom(ctx->global, ATOM_STR("\xD", "t_bs_matchable")); + + case BEAM_TYPE_CONS: + return globalcontext_make_atom(ctx->global, ATOM_STR("\x6", "t_cons")); + + case BEAM_TYPE_FLOAT: + return globalcontext_make_atom(ctx->global, ATOM_STR("\x7", "t_float")); + + case BEAM_TYPE_FUN: + return globalcontext_make_atom(ctx->global, ATOM_STR("\x5", "t_fun")); + + case (BEAM_TYPE_FLOAT | BEAM_TYPE_INTEGER): + // {t_number, {LowerBound, UpperBound}} + if (has_lower || has_upper) { + if (UNLIKELY(memory_ensure_free(ctx, TUPLE_SIZE(2) + TUPLE_SIZE(2)) != MEMORY_GC_OK)) { + return globalcontext_make_atom(ctx->global, ATOM_STR("\x3", "any")); + } + term bounds_tuple = term_alloc_tuple(2, &ctx->heap); + + if (has_lower) { + term_put_tuple_element(bounds_tuple, 0, term_from_int64(lower_bound)); + } else { + term_put_tuple_element(bounds_tuple, 0, globalcontext_make_atom(ctx->global, ATOM_STR("\x4", "-inf"))); + } + + if (has_upper) { + term_put_tuple_element(bounds_tuple, 1, term_from_int64(upper_bound)); + } else { + term_put_tuple_element(bounds_tuple, 1, globalcontext_make_atom(ctx->global, ATOM_STR("\x4", "+inf"))); + } + + term type_tuple = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(type_tuple, 0, globalcontext_make_atom(ctx->global, ATOM_STR("\x8", "t_number"))); + term_put_tuple_element(type_tuple, 1, bounds_tuple); + return type_tuple; + } + return globalcontext_make_atom(ctx->global, ATOM_STR("\x8", "t_number")); + + case BEAM_TYPE_INTEGER: + if (has_lower || has_upper) { + if (UNLIKELY(memory_ensure_free(ctx, TUPLE_SIZE(2) + TUPLE_SIZE(2)) != MEMORY_GC_OK)) { + return globalcontext_make_atom(ctx->global, ATOM_STR("\x3", "any")); + } + term bounds_tuple = term_alloc_tuple(2, &ctx->heap); + + if (has_lower) { + term_put_tuple_element(bounds_tuple, 0, term_from_int64(lower_bound)); + } else { + term_put_tuple_element(bounds_tuple, 0, globalcontext_make_atom(ctx->global, ATOM_STR("\x4", "-inf"))); + } + + if (has_upper) { + term_put_tuple_element(bounds_tuple, 1, term_from_int64(upper_bound)); + } else { + term_put_tuple_element(bounds_tuple, 1, globalcontext_make_atom(ctx->global, ATOM_STR("\x4", "+inf"))); + } + + term type_tuple = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(type_tuple, 0, globalcontext_make_atom(ctx->global, ATOM_STR("\x9", "t_integer"))); + term_put_tuple_element(type_tuple, 1, bounds_tuple); + return type_tuple; + } + return globalcontext_make_atom(ctx->global, ATOM_STR("\x9", "t_integer")); + + case BEAM_TYPE_MAP: + return globalcontext_make_atom(ctx->global, ATOM_STR("\x5", "t_map")); + + case BEAM_TYPE_NIL: + return globalcontext_make_atom(ctx->global, ATOM_STR("\x3", "nil")); + + case (BEAM_TYPE_NIL | BEAM_TYPE_CONS): + return globalcontext_make_atom(ctx->global, ATOM_STR("\x6", "t_list")); + + case BEAM_TYPE_PID: + return globalcontext_make_atom(ctx->global, ATOM_STR("\x3", "pid")); + + case BEAM_TYPE_PORT: + return globalcontext_make_atom(ctx->global, ATOM_STR("\x4", "port")); + + case BEAM_TYPE_REFERENCE: + return globalcontext_make_atom(ctx->global, ATOM_STR("\x9", "reference")); + + case BEAM_TYPE_TUPLE: + return globalcontext_make_atom(ctx->global, ATOM_STR("\x7", "t_tuple")); + + default: + // Default fallback for any other combination or union types + return globalcontext_make_atom(ctx->global, ATOM_STR("\x3", "any")); + } +} + #ifndef AVM_NO_JIT ModuleNativeEntryPoint module_get_native_entry_point(Module *module, int exported_label) { diff --git a/src/libAtomVM/module.h b/src/libAtomVM/module.h index d97e70817..6adf267cd 100644 --- a/src/libAtomVM/module.h +++ b/src/libAtomVM/module.h @@ -150,6 +150,8 @@ struct Module struct LiteralEntry *literals_table; + void *types_data; + atom_index_t *local_atoms_to_global_table; void *module_platform_data; @@ -261,6 +263,16 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary */ term module_load_literal(Module *mod, int index, Context *ctx); +/** + * @brief Gets type information for the given type index + * + * @details Loads and parses type information from the Type chunk and returns the type. + * @param mod The module that owns the type information. + * @param type_index a valid type index. + * @param ctx the target context. + */ +term module_get_type_by_index(const Module *mod, int type_index, Context *ctx); + /** * @brief Gets a term for the given local atom id * diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index eb12f1cfd..66135d8cf 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -202,6 +202,7 @@ static term nif_code_server_resume(Context *ctx, int argc, term argv[]); static term nif_code_server_code_chunk(Context *ctx, int argc, term argv[]); static term nif_code_server_atom_resolver(Context *ctx, int argc, term argv[]); static term nif_code_server_literal_resolver(Context *ctx, int argc, term argv[]); +static term nif_code_server_type_resolver(Context *ctx, int argc, term argv[]); static term nif_code_server_set_native_code(Context *ctx, int argc, term argv[]); #endif static term nif_erlang_module_loaded(Context *ctx, int argc, term argv[]); @@ -768,6 +769,10 @@ static const struct Nif code_server_literal_resolver_nif = { .base.type = NIFFunctionType, .nif_ptr = nif_code_server_literal_resolver }; +static const struct Nif code_server_type_resolver_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_code_server_type_resolver +}; static const struct Nif code_server_set_native_code_nif = { .base.type = NIFFunctionType, .nif_ptr = nif_code_server_set_native_code @@ -5600,6 +5605,21 @@ static term nif_code_server_literal_resolver(Context *ctx, int argc, term argv[] return module_load_literal(mod, literal_index, ctx); } +static term nif_code_server_type_resolver(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + VALIDATE_VALUE(argv[0], term_is_atom); + VALIDATE_VALUE(argv[1], term_is_integer); + + term module_name = argv[0]; + Module *mod = globalcontext_get_module(ctx->global, term_to_atom_index(module_name)); + if (IS_NULL_PTR(mod)) { + RAISE_ERROR(BADARG_ATOM); + } + int type_index = term_to_int(argv[1]); + return module_get_type_by_index(mod, type_index, ctx); +} + static term nif_code_server_set_native_code(Context *ctx, int argc, term argv[]) { UNUSED(argc); diff --git a/src/libAtomVM/nifs.gperf b/src/libAtomVM/nifs.gperf index 39cf39224..a647c1de0 100644 --- a/src/libAtomVM/nifs.gperf +++ b/src/libAtomVM/nifs.gperf @@ -182,6 +182,7 @@ code_server:resume/2, &code_server_resume_nif code_server:code_chunk/1, IF_HAVE_JIT(&code_server_code_chunk_nif) code_server:atom_resolver/2, IF_HAVE_JIT(&code_server_atom_resolver_nif) code_server:literal_resolver/2, IF_HAVE_JIT(&code_server_literal_resolver_nif) +code_server:type_resolver/2, IF_HAVE_JIT(&code_server_type_resolver_nif) code_server:set_native_code/3, IF_HAVE_JIT(&code_server_set_native_code_nif) console:print/1, &console_print_nif base64:encode/1, &base64_encode_nif diff --git a/tests/erlang_tests/CMakeLists.txt b/tests/erlang_tests/CMakeLists.txt index 6a4760c9b..7bb3df40d 100644 --- a/tests/erlang_tests/CMakeLists.txt +++ b/tests/erlang_tests/CMakeLists.txt @@ -574,6 +574,8 @@ compile_erlang(test_raw_raise) compile_erlang(test_ets) compile_erlang(test_node) +compile_erlang(test_code_server_nifs) + compile_erlang(test_op_bs_start_match) compile_assembler(test_op_bs_start_match_asm) compile_erlang(test_op_bs_create_bin) @@ -1101,12 +1103,13 @@ set(erlang_test_beams test_ets.beam test_node.beam - test_list_to_bitstring.beam test_lists_member.beam test_lists_keymember.beam test_lists_keyfind.beam + test_code_server_nifs.beam + test_op_bs_start_match.beam test_op_bs_create_bin.beam diff --git a/tests/erlang_tests/test_code_server_nifs.erl b/tests/erlang_tests/test_code_server_nifs.erl new file mode 100644 index 000000000..f1418e40a --- /dev/null +++ b/tests/erlang_tests/test_code_server_nifs.erl @@ -0,0 +1,129 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(test_code_server_nifs). + +-export([start/0, test_literals/0]). + +start() -> + case erlang:system_info(machine) of + "BEAM" -> + OTPRelease = erlang:system_info(otp_release), + if + OTPRelease >= "26" -> + ok = test_is_loaded(); + true -> + ok + end, + ok; + "ATOM" -> + ok = test_is_loaded(), + case erlang:system_info(emu_flavor) of + jit -> + ok = test_atom_resolver(), + ok = test_literal_resolver(), + ok = test_type_resolver(), + ok = test_error_cases(); + emu -> + ok + end + end, + 0. + +%% Test code_server:is_loaded/1 +test_is_loaded() -> + M = code_server:is_loaded(?MODULE), + % On BEAM, this undocumented function returns {file, _Path}, not true + true = M =/= false, + false = code_server:is_loaded(non_existent_module_12345), + ok. + +%% Test code_server:atom_resolver/2 +test_atom_resolver() -> + % The first atom (index 1) should be the module name + ModuleName = code_server:atom_resolver(?MODULE, 1), + true = is_atom(ModuleName), + ?MODULE = ModuleName, + + % Test some other atoms that should exist in this module + Atom2 = code_server:atom_resolver(?MODULE, 2), + true = is_atom(Atom2), + start = Atom2, + ok. + +%% Test code_server:literal_resolver/2 +test_literal_resolver() -> + try + Literal0 = code_server:literal_resolver(?MODULE, 0), + Literal1 = code_server:literal_resolver(?MODULE, 1), + true = Literal0 =/= Literal1, + ok + catch + % If there are no literals, that's also acceptable + error:badarg -> ok + end, + ok. + +%% Test code_server:type_resolver/2 +test_type_resolver() -> + any = code_server:type_resolver(?MODULE, 0), + t_atom = code_server:type_resolver(?MODULE, 1), + true = test_type_resolver0(2), + ok. + +test_type_resolver0(N) -> + case code_server:type_resolver(?MODULE, N) of + any -> false; + % We know N >= 2 + {t_integer, {2, '+inf'}} -> true; + _Other -> test_type_resolver0(N + 1) + end. + +%% Test error cases +test_error_cases() -> + % Test with invalid module + try + code_server:atom_resolver(non_existent_module, 1), + error(should_have_failed) + catch + error:badarg -> ok + end, + + try + code_server:literal_resolver(non_existent_module, 0), + error(should_have_failed) + catch + error:badarg -> ok + end, + + try + code_server:type_resolver(non_existent_module, 0), + error(should_have_failed) + catch + error:badarg -> ok + end, + ok. + +%% Function with literals for testing +test_literals() -> + List = [1, 2, 3, atom, "string"], + Tuple = {hello, world, 42, 3.14159}, + Map = #{key => value, number => 123}, + {List, Tuple, Map}. diff --git a/tests/libs/jit/jit_tests.erl b/tests/libs/jit/jit_tests.erl index ea1837fde..b9522884a 100644 --- a/tests/libs/jit/jit_tests.erl +++ b/tests/libs/jit/jit_tests.erl @@ -32,6 +32,22 @@ 0, 2, 18, 66, 16, 1, 96, 64, 3, 19, 64, 18, 3, 78, 32, 16, 3>> ). +% Code chunk with typed register from test_term_to_int.erl +% Contains bs_get_binary2 opcode with typed register that uses term_to_int optimization +-define(CODE_CHUNK_1, + <<0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 182, 0, 0, 0, 4, 0, 0, 0, 1, 1, 16, 153, 16, 2, 18, 34, 32, + 1, 32, 45, 21, 19, 166, 53, 3, 32, 35, 117, 53, 87, 35, 16, 48, 87, 19, 32, 16, 0, 19, 182, + 53, 35, 23, 32, 50, 0, 64, 19, 3, 19, 1, 48, 153, 32, 72, 3, 3>> +). +-define(ATU8_CHUNK_1, + <<255, 255, 255, 253, 8, 16, 116, 101, 115, 116, 95, 116, 101, 114, 109, 95, 116, 111, 95, 105, + 110, 116, 144, 101, 120, 116, 114, 97, 99, 116, 95, 105, 224, 101, 110, 115, 117, 114, 101, + 95, 101, 120, 97, 99, 116, 108, 121>> +). +-define(TYPE_CHUNK_1, + <<0, 0, 0, 3, 0, 0, 0, 3, 15, 255, 0, 2, 0, 32>> +). + compile_minimal_x86_64_test() -> Stream0 = jit_stream_binary:new(0), <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_0, @@ -40,7 +56,12 @@ compile_minimal_x86_64_test() -> ), Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), {_LabelsCount, Stream3} = jit:compile( - ?CODE_CHUNK_0, fun(_) -> undefined end, fun(_) -> undefined end, jit_x86_64, Stream2 + ?CODE_CHUNK_0, + fun(_) -> undefined end, + fun(_) -> undefined end, + fun(_) -> any end, + jit_x86_64, + Stream2 ), Stream4 = jit_x86_64:stream(Stream3), <<16:32, LabelsCount:32, ?JIT_FORMAT_VERSION:16, 1:16, ?JIT_ARCH_X86_64:16, ?JIT_VARIANT_PIC:16, @@ -69,3 +90,42 @@ check_labels_table0(_, <<>>) -> ok; check_labels_table0(N, <>) -> check_labels_table0(N + 1, Rest). check_lines_table(<>) -> ok. + +% Test term_to_int optimization with typed registers using real BEAM code +term_to_int_typed_optimization_x86_64_test() -> + % Compile CODE_CHUNK_1 which contains a typed register for term_to_int optimization + Stream0 = jit_stream_binary:new(0), + <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_1, + Stream1 = jit_stream_binary:append( + Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC) + ), + Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), + + AtomResolver = jit_precompile:atom_resolver(?ATU8_CHUNK_1), + LiteralResolver = fun(_) -> test_literal end, + TypeResolver = jit_precompile:type_resolver(?TYPE_CHUNK_1), + + % Compile with typed register support + {_LabelsCount, Stream3} = jit:compile( + ?CODE_CHUNK_1, AtomResolver, LiteralResolver, TypeResolver, jit_x86_64, Stream2 + ), + CompiledCode = jit_x86_64:stream(Stream3), + + % Check the reading of x[1] is immediatly followed by a shift right. + % 15c: 4c 8b 5f 38 mov 0x38(%rdi),%r11 + % 160: 49 c1 eb 04 shr $0x4,%r11 + + % As opposed to testing its type + % 15c: 4c 8b 5f 38 mov 0x38(%rdi),%r11 + % 160: 4d 89 da mov %r11,%r10 + % 163: 41 80 e2 0f and $0xf,%r10b + % 167: 41 80 fa 0f cmp $0xf,%r10b + % 16b: 74 05 je 0x172 + % 16d: e9 ab 00 00 00 jmpq 0x21d + % 172: 49 c1 eb 04 shr $0x4,%r11 + ?assertMatch( + {_, 8}, + binary:match(CompiledCode, <<16#4c, 16#8b, 16#5f, 16#38, 16#49, 16#c1, 16#eb, 16#04>>) + ), + + ok. diff --git a/tests/test.c b/tests/test.c index e01bce458..51977c1d2 100644 --- a/tests/test.c +++ b/tests/test.c @@ -555,6 +555,8 @@ struct Test tests[] = { TEST_CASE(test_op_bs_start_match), TEST_CASE(test_op_bs_create_bin), + TEST_CASE(test_code_server_nifs), + // noisy tests, keep them at the end TEST_CASE_EXPECTED(spawn_opt_monitor_normal, 1), TEST_CASE_EXPECTED(spawn_opt_link_normal, 1), diff --git a/tools/packbeam/packbeam.c b/tools/packbeam/packbeam.c index 5191fdd72..63f1f726d 100644 --- a/tools/packbeam/packbeam.c +++ b/tools/packbeam/packbeam.c @@ -341,6 +341,10 @@ static void pack_beam_file(FILE *pack, const uint8_t *data, size_t size, const c assert_fwrite(data + offsets[AVMN], sizes[AVMN] + IFF_SECTION_HEADER_SIZE, pack); pad_and_align(pack); } + if (offsets[TYPE]) { + assert_fwrite(data + offsets[TYPE], sizes[TYPE] + IFF_SECTION_HEADER_SIZE, pack); + pad_and_align(pack); + } if (offsets[LINT] && include_lines) { assert_fwrite(data + offsets[LINT], sizes[LINT] + IFF_SECTION_HEADER_SIZE, pack); pad_and_align(pack); From 5bf8793fc6e8881564f72e504e520c23ecaab2a3 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 23 Sep 2025 21:17:49 +0200 Subject: [PATCH 03/46] JIT: optimize verify_is_match_state_and_get_ptr using types Signed-off-by: Paul Guyot --- libs/jit/src/jit.erl | 245 ++++++++++++++++++----------------- src/libAtomVM/module.c | 15 +-- tests/libs/jit/jit_tests.erl | 42 +++++- 3 files changed, 170 insertions(+), 132 deletions(-) diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 8dc76772b..10a41cd17 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -1176,21 +1176,20 @@ first_pass(<>, MMod, MSt0, State0) -> first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), - {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), + {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), {_Live, Rest3} = decode_literal(Rest2), {MSt2, Size, Rest4} = decode_typed_compact_term(Rest3, MMod, MSt1, State0), {Unit, Rest5} = decode_literal(Rest4), {FlagsValue, Rest6} = decode_literal(Rest5), - {MSt3, SrcReg} = MMod:move_to_native_register(MSt2, Src), - {MSt4, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt3, {free, SrcReg}), - {MSt5, SizeReg} = term_to_int(Size, Fail, MMod, MSt4), + {MSt3, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt2, Src), + {MSt4, SizeReg} = term_to_int(Size, Fail, MMod, MSt3), {MSt6, NumBits} = if is_integer(SizeReg) -> - {MSt5, SizeReg * Unit}; + {MSt4, SizeReg * Unit}; true -> - MSt5M = MMod:mul(MSt5, SizeReg, Unit), - {MSt5M, SizeReg} + MSt5 = MMod:mul(MSt4, SizeReg, Unit), + {MSt5, SizeReg} end, {MSt7, BSBinaryReg} = MMod:get_array_element(MSt6, MatchStateRegPtr, 1), {MSt8, BSOffsetReg} = MMod:get_array_element(MSt7, MatchStateRegPtr, 2), @@ -1215,21 +1214,20 @@ first_pass(<>, MMod, MSt0, State0) -> first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), - {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), + {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), {_Live, Rest3} = decode_literal(Rest2), {MSt2, Size, Rest4} = decode_typed_compact_term(Rest3, MMod, MSt1, State0), {Unit, Rest5} = decode_literal(Rest4), {FlagsValue, Rest6} = decode_literal(Rest5), - {MSt3, SrcReg} = MMod:move_to_native_register(MSt2, Src), - {MSt4, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt3, {free, SrcReg}), - {MSt5, SizeReg} = term_to_int(Size, Fail, MMod, MSt4), + {MSt3, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt2, Src), + {MSt4, SizeReg} = term_to_int(Size, Fail, MMod, MSt3), {MSt6, NumBits} = if is_integer(SizeReg) -> - {MSt5, SizeReg * Unit}; + {MSt4, SizeReg * Unit}; true -> - MSt5M = MMod:mul(MSt5, SizeReg, Unit), - {MSt5M, SizeReg} + MSt5 = MMod:mul(MSt4, SizeReg, Unit), + {MSt5, SizeReg} end, {MSt7, BSBinaryReg} = MMod:get_array_element(MSt6, MatchStateRegPtr, 1), {MSt8, BSOffsetReg} = MMod:get_array_element(MSt7, MatchStateRegPtr, 2), @@ -1253,49 +1251,48 @@ first_pass(<>, MMod, MSt0, State0) -> first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), - {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), + {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), {Live, Rest3} = decode_literal(Rest2), {MSt2, Size, Rest4} = decode_compact_term(Rest3, MMod, MSt1, State0), {Unit, Rest5} = decode_literal(Rest4), {FlagsValue, Rest6} = decode_literal(Rest5), - {MSt3, SrcReg} = MMod:move_to_native_register(MSt2, Src), - {MSt4, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt3, {free, SrcReg}), - {MSt5, BSBinaryReg} = MMod:get_array_element(MSt4, MatchStateRegPtr, 1), - {MSt6, BSOffsetReg} = MMod:get_array_element(MSt5, MatchStateRegPtr, 2), - MSt7 = + {MSt3, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt2, Src), + {MSt4, BSBinaryReg} = MMod:get_array_element(MSt3, MatchStateRegPtr, 1), + {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateRegPtr, 2), + MSt6 = if Unit =/= 8 -> - MMod:call_primitive_last(MSt6, ?PRIM_RAISE_ERROR, [ + MMod:call_primitive_last(MSt5, ?PRIM_RAISE_ERROR, [ ctx, jit_state, offset, ?UNSUPPORTED_ATOM ]); FlagsValue =/= 0 -> - MMod:call_primitive_last(MSt6, ?PRIM_RAISE_ERROR, [ + MMod:call_primitive_last(MSt5, ?PRIM_RAISE_ERROR, [ ctx, jit_state, offset, ?UNSUPPORTED_ATOM ]); true -> - MSt6 + MSt5 end, - MSt8 = MMod:if_block(MSt7, {BSOffsetReg, '&', 16#7, '!=', 0}, fun(BlockSt) -> + MSt7 = MMod:if_block(MSt6, {BSOffsetReg, '&', 16#7, '!=', 0}, fun(BlockSt) -> MMod:call_primitive_last(BlockSt, ?PRIM_RAISE_ERROR, [ctx, jit_state, offset, ?BADARG_ATOM]) end), - MSt9 = MMod:shift_right(MSt8, BSOffsetReg, 3), - MSt10 = MMod:and_(MSt9, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), - {MSt11, SizeReg} = MMod:get_array_element(MSt10, BSBinaryReg, 1), - {MSt14, SizeValue} = + MSt8 = MMod:shift_right(MSt7, BSOffsetReg, 3), + MSt9 = MMod:and_(MSt8, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt10, SizeReg} = MMod:get_array_element(MSt9, BSBinaryReg, 1), + {MSt13, SizeValue} = if Size =:= ?ALL_ATOM -> - MSt12 = MMod:sub(MSt11, SizeReg, BSOffsetReg), - {MSt12, SizeReg}; + MSt11 = MMod:sub(MSt10, SizeReg, BSOffsetReg), + {MSt11, SizeReg}; is_integer(Size) -> % SizeReg is binary size % SizeVal is a constant - MSt12 = MMod:sub(MSt11, SizeReg, Size bsl 4), - MSt13 = cond_jump_to_label({{free, SizeReg}, '<', BSOffsetReg}, Fail, MMod, MSt12), - {MSt13, Size bsl 4}; + MSt11 = MMod:sub(MSt10, SizeReg, Size bsl 4), + MSt12 = cond_jump_to_label({{free, SizeReg}, '<', BSOffsetReg}, Fail, MMod, MSt11), + {MSt12, Size bsl 4}; true -> - {MSt12, SizeValReg} = MMod:move_to_native_register(MSt11, Size), - MSt13 = MMod:if_else_block( - MSt12, + {MSt11, SizeValReg} = MMod:move_to_native_register(MSt10, Size), + MSt12 = MMod:if_else_block( + MSt11, {SizeValReg, '==', ?ALL_ATOM}, fun(BSt0) -> BSt1 = MMod:sub(BSt0, SizeReg, BSOffsetReg), @@ -1309,53 +1306,52 @@ first_pass(<>, MMod, MSt0, State0) -> MMod:free_native_registers(BSt4, [SizeValReg]) end ), - {MSt13, SizeReg} + {MSt12, SizeReg} end, - {MSt15, NewOffsetReg} = MMod:copy_to_native_register(MSt14, BSOffsetReg), - MSt16 = MMod:add(MSt15, NewOffsetReg, SizeValue), - MSt17 = MMod:shift_left(MSt16, NewOffsetReg, 3), + {MSt14, NewOffsetReg} = MMod:copy_to_native_register(MSt13, BSOffsetReg), + MSt15 = MMod:add(MSt14, NewOffsetReg, SizeValue), + MSt16 = MMod:shift_left(MSt15, NewOffsetReg, 3), % Write new offset - MSt18 = MMod:move_to_array_element(MSt17, NewOffsetReg, MatchStateRegPtr, 2), - MSt19 = MMod:free_native_registers(MSt18, [NewOffsetReg, MatchStateRegPtr]), - {MSt20, TrimResultReg} = MMod:call_primitive(MSt19, ?PRIM_TRIM_LIVE_REGS, [ctx, Live]), - MSt21 = MMod:free_native_registers(MSt20, [TrimResultReg]), - {MSt22, HeapSizeReg} = MMod:call_primitive(MSt21, ?PRIM_TERM_SUB_BINARY_HEAP_SIZE, [ + MSt17 = MMod:move_to_array_element(MSt16, NewOffsetReg, MatchStateRegPtr, 2), + MSt18 = MMod:free_native_registers(MSt17, [NewOffsetReg, MatchStateRegPtr]), + {MSt19, TrimResultReg} = MMod:call_primitive(MSt18, ?PRIM_TRIM_LIVE_REGS, [ctx, Live]), + MSt20 = MMod:free_native_registers(MSt19, [TrimResultReg]), + {MSt21, HeapSizeReg} = MMod:call_primitive(MSt20, ?PRIM_TERM_SUB_BINARY_HEAP_SIZE, [ BSBinaryReg, SizeValue ]), - MSt23 = MMod:or_(MSt22, BSBinaryReg, ?TERM_PRIMARY_BOXED), - {MSt24, NewBSBinaryReg} = memory_ensure_free_with_extra_root( - BSBinaryReg, Live, {free, HeapSizeReg}, MMod, MSt23 + MSt22 = MMod:or_(MSt21, BSBinaryReg, ?TERM_PRIMARY_BOXED), + {MSt23, NewBSBinaryReg} = memory_ensure_free_with_extra_root( + BSBinaryReg, Live, {free, HeapSizeReg}, MMod, MSt22 ), - {MSt25, ResultTerm} = MMod:call_primitive(MSt24, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ + {MSt24, ResultTerm} = MMod:call_primitive(MSt23, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ ctx, {free, NewBSBinaryReg}, {free, BSOffsetReg}, {free, SizeValue} ]), - {MSt26, Dest, Rest7} = decode_dest(Rest6, MMod, MSt25), + {MSt25, Dest, Rest7} = decode_dest(Rest6, MMod, MSt24), ?TRACE("OP_BS_GET_BINARY2 ~p,~p,~p,~p,~p,~p,~p\n", [ Fail, Src, Live, Size, Unit, FlagsValue, Dest ]), - MSt27 = MMod:move_to_vm_register(MSt26, ResultTerm, Dest), - MSt28 = MMod:free_native_registers(MSt27, [ResultTerm, Dest]), - ?ASSERT_ALL_NATIVE_FREE(MSt28), - first_pass(Rest7, MMod, MSt28, State0); + MSt26 = MMod:move_to_vm_register(MSt25, ResultTerm, Dest), + MSt27 = MMod:free_native_registers(MSt26, [ResultTerm]), + ?ASSERT_ALL_NATIVE_FREE(MSt27), + first_pass(Rest7, MMod, MSt27, State0); % 120 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), - {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), + {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), {MSt2, Size, Rest3} = decode_typed_compact_term(Rest2, MMod, MSt1, State0), {Unit, Rest4} = decode_literal(Rest3), {_FlagsValue, Rest5} = decode_literal(Rest4), ?TRACE("OP_BS_SKIP_BITS2 ~p, ~p, ~p, ~p, ~p\n", [Fail, Src, Size, Unit, _FlagsValue]), - {MSt3, SrcReg} = MMod:move_to_native_register(MSt2, Src), - {MSt4, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt3, {free, SrcReg}), - {MSt5, SizeReg} = term_to_int(Size, Fail, MMod, MSt4), + {MSt3, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt2, Src), + {MSt4, SizeReg} = term_to_int(Size, Fail, MMod, MSt3), {MSt6, NumBits} = if is_integer(SizeReg) -> - {MSt5, SizeReg * Unit}; + {MSt4, SizeReg * Unit}; true -> - MSt5M = MMod:mul(MSt5, SizeReg, Unit), - {MSt5M, SizeReg} + MSt5 = MMod:mul(MSt4, SizeReg, Unit), + {MSt5, SizeReg} end, {MSt7, BSBinaryReg} = MMod:get_array_element(MSt6, MatchStateRegPtr, 1), {MSt8, BSOffsetReg} = MMod:get_array_element(MSt7, MatchStateRegPtr, 2), @@ -1372,21 +1368,20 @@ first_pass(<>, MMod, MSt0, State0) -> first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), - {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), + {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), {Bits, Rest3} = decode_literal(Rest2), ?TRACE("OP_BS_TEST_TAIL2 ~p, ~p, ~p\n", [Fail, Src, Bits]), - {MSt2, SrcReg} = MMod:move_to_native_register(MSt1, Src), - {MSt3, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt2, {free, SrcReg}), - {MSt4, BSBinaryReg} = MMod:get_array_element(MSt3, MatchStateRegPtr, 1), - {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateRegPtr, 2), - MSt6 = MMod:free_native_registers(MSt5, [MatchStateRegPtr]), - MSt7 = MMod:add(MSt6, BSOffsetReg, Bits), - {MSt8, BSBinarySize} = term_binary_size({free, BSBinaryReg}, MMod, MSt7), - MSt9 = MMod:shift_left(MSt8, BSBinarySize, 3), - MSt10 = cond_jump_to_label({{free, BSBinarySize}, '!=', BSOffsetReg}, Fail, MMod, MSt9), - MSt11 = MMod:free_native_registers(MSt10, [BSOffsetReg]), - ?ASSERT_ALL_NATIVE_FREE(MSt11), - first_pass(Rest3, MMod, MSt11, State0); + {MSt2, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt1, Src), + {MSt3, BSBinaryReg} = MMod:get_array_element(MSt2, MatchStateRegPtr, 1), + {MSt4, BSOffsetReg} = MMod:get_array_element(MSt3, MatchStateRegPtr, 2), + MSt5 = MMod:free_native_registers(MSt4, [MatchStateRegPtr]), + MSt6 = MMod:add(MSt5, BSOffsetReg, Bits), + {MSt7, BSBinarySize} = term_binary_size({free, BSBinaryReg}, MMod, MSt6), + MSt8 = MMod:shift_left(MSt7, BSBinarySize, 3), + MSt9 = cond_jump_to_label({{free, BSBinarySize}, '!=', BSOffsetReg}, Fail, MMod, MSt8), + MSt10 = MMod:free_native_registers(MSt9, [BSOffsetReg]), + ?ASSERT_ALL_NATIVE_FREE(MSt10), + first_pass(Rest3, MMod, MSt10, State0); % 124 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -1468,23 +1463,22 @@ first_pass(<>, MMod, MSt0, State0) -> first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), - {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), + {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), {Bits, Rest3} = decode_literal(Rest2), {Offset, Rest4} = decode_literal(Rest3), ?TRACE("OP_BS_MATCH_STRING ~p,~p,~p,~p\n", [Fail, Src, Bits, Offset]), - {MSt2, SrcReg} = MMod:move_to_native_register(MSt1, Src), - {MSt3, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt2, {free, SrcReg}), - {MSt4, BSBinaryReg} = MMod:get_array_element(MSt3, MatchStateRegPtr, 1), - {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateRegPtr, 2), - {MSt6, MatchResult} = MMod:call_primitive(MSt5, ?PRIM_BITSTRING_MATCH_MODULE_STR, [ + {MSt2, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt1, Src), + {MSt3, BSBinaryReg} = MMod:get_array_element(MSt2, MatchStateRegPtr, 1), + {MSt4, BSOffsetReg} = MMod:get_array_element(MSt3, MatchStateRegPtr, 2), + {MSt5, MatchResult} = MMod:call_primitive(MSt4, ?PRIM_BITSTRING_MATCH_MODULE_STR, [ ctx, jit_state, {free, BSBinaryReg}, BSOffsetReg, Offset, Bits ]), - MSt7 = cond_jump_to_label({'(bool)', {free, MatchResult}, '==', false}, Fail, MMod, MSt6), - MSt8 = MMod:add(MSt7, BSOffsetReg, Bits), - MSt9 = MMod:move_to_array_element(MSt8, BSOffsetReg, MatchStateRegPtr, 2), - MSt10 = MMod:free_native_registers(MSt9, [BSOffsetReg, MatchStateRegPtr]), - ?ASSERT_ALL_NATIVE_FREE(MSt10), - first_pass(Rest4, MMod, MSt10, State0); + MSt6 = cond_jump_to_label({'(bool)', {free, MatchResult}, '==', false}, Fail, MMod, MSt5), + MSt7 = MMod:add(MSt6, BSOffsetReg, Bits), + MSt8 = MMod:move_to_array_element(MSt7, BSOffsetReg, MatchStateRegPtr, 2), + MSt9 = MMod:free_native_registers(MSt8, [BSOffsetReg, MatchStateRegPtr]), + ?ASSERT_ALL_NATIVE_FREE(MSt9), + first_pass(Rest4, MMod, MSt9, State0); % 133 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -2019,29 +2013,26 @@ first_pass(<>, MMod, MSt0, State0) -> % 165 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), - {MSt1, Src, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), + {MSt1, Src, Rest1} = decode_typed_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Dest, Rest2} = decode_dest(Rest1, MMod, MSt1), {Live, Rest3} = decode_literal(Rest2), ?TRACE("OP_BS_GET_TAIL ~p, ~p, ~p\n", [Src, Dest, Live]), - {MSt3, MatchStateReg0} = MMod:move_to_native_register(MSt2, Src), - {MSt4, MatchStateRegPtr} = verify_is_match_state_and_get_ptr( - MMod, MSt3, {free, MatchStateReg0} - ), - {MSt5, BSBinaryReg} = MMod:get_array_element(MSt4, MatchStateRegPtr, 1), - {MSt6, BSOffsetReg} = MMod:get_array_element(MSt5, MatchStateRegPtr, 2), - MSt7 = MMod:free_native_registers(MSt6, [MatchStateRegPtr]), - MSt8 = MMod:and_(MSt7, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), - {MSt9, ResultTerm, NewMatchState} = do_get_tail( - Src, Live, BSOffsetReg, BSBinaryReg, MMod, MSt8 - ), - MSt10 = MMod:free_native_registers(MSt9, [BSBinaryReg]), - {MSt11, MatchStateReg1} = MMod:move_to_native_register(MSt10, NewMatchState), - MSt12 = MMod:and_(MSt11, MatchStateReg1, ?TERM_PRIMARY_CLEAR_MASK), - MSt13 = MMod:move_to_array_element(MSt12, BSOffsetReg, MatchStateReg1, 2), - MSt14 = MMod:move_to_vm_register(MSt13, ResultTerm, Dest), - MSt15 = MMod:free_native_registers(MSt14, [MatchStateReg1, BSOffsetReg, ResultTerm, Dest]), - ?ASSERT_ALL_NATIVE_FREE(MSt15), - first_pass(Rest3, MMod, MSt15, State0); + {MSt3, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt2, Src), + {MSt4, BSBinaryReg} = MMod:get_array_element(MSt3, MatchStateRegPtr, 1), + {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateRegPtr, 2), + MSt6 = MMod:free_native_registers(MSt5, [MatchStateRegPtr]), + MSt7 = MMod:and_(MSt6, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt8, ResultTerm, NewMatchState} = do_get_tail( + Src, Live, BSOffsetReg, BSBinaryReg, MMod, MSt7 + ), + MSt9 = MMod:free_native_registers(MSt8, [BSBinaryReg]), + {MSt10, MatchStateReg1} = MMod:move_to_native_register(MSt9, NewMatchState), + MSt11 = MMod:and_(MSt10, MatchStateReg1, ?TERM_PRIMARY_CLEAR_MASK), + MSt12 = MMod:move_to_array_element(MSt11, BSOffsetReg, MatchStateReg1, 2), + MSt13 = MMod:move_to_vm_register(MSt12, ResultTerm, Dest), + MSt14 = MMod:free_native_registers(MSt13, [MatchStateReg1, BSOffsetReg, ResultTerm, Dest]), + ?ASSERT_ALL_NATIVE_FREE(MSt14), + first_pass(Rest3, MMod, MSt14, State0); % 166 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -2074,16 +2065,15 @@ first_pass(<>, MMod, MSt0, State0) -> % 168 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), - {MSt1, Src, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), + {MSt1, Src, Rest1} = decode_typed_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Pos, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt1, State0), ?TRACE("OP_BS_SET_POSITION ~p, ~p\n", [Src, Pos]), - {MSt3, MatchStateReg} = MMod:move_to_native_register(MSt2, Src), - {MSt4, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt3, {free, MatchStateReg}), - {MSt5, PosVal} = term_to_int(Pos, 0, MMod, MSt4), - MSt6 = MMod:move_to_array_element(MSt5, PosVal, MatchStateRegPtr, 2), - MSt7 = MMod:free_native_registers(MSt6, [PosVal, MatchStateRegPtr]), - ?ASSERT_ALL_NATIVE_FREE(MSt7), - first_pass(Rest2, MMod, MSt7, State0); + {MSt3, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt2, Src), + {MSt4, PosVal} = term_to_int(Pos, 0, MMod, MSt3), + MSt5 = MMod:move_to_array_element(MSt4, PosVal, MatchStateRegPtr, 2), + MSt6 = MMod:free_native_registers(MSt5, [PosVal, MatchStateRegPtr]), + ?ASSERT_ALL_NATIVE_FREE(MSt6), + first_pass(Rest2, MMod, MSt6, State0); % 169 first_pass(<>, MMod, MSt0, State) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -3066,14 +3056,27 @@ verify_is_boxed(MMod, MSt0, Reg, FailLabel) -> %% @doc verify_match_state and return the term_ptr for Reg. %% Actually, this means Reg isn't restored with OR ?TERM_PRIMARY_BOXED -verify_is_match_state_and_get_ptr(MMod, MSt0, {free, Reg}) -> - MSt1 = verify_is_boxed(MMod, MSt0, Reg), - MSt2 = MMod:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), - {MSt3, BoxTag} = MMod:get_array_element(MSt2, Reg, 0), - MSt4 = cond_raise_badarg( - {{free, BoxTag}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_BIN_MATCH_STATE}, MMod, MSt3 +verify_is_match_state_and_get_ptr(MMod, MSt0, {typed, Src, {t_bs_matchable, _Unit}}) -> + %% If Src is of type t_bs_matchable, it means it's boxed but we need to check + %% if it is a bin_match_state (OTP27 type had a bs_context type but it's + %% gone with OTP28) + {MSt1, SrcReg} = MMod:move_to_native_register(MSt0, Src), + verify_is_match_state_and_get_ptr0(MMod, MSt1, SrcReg); +verify_is_match_state_and_get_ptr(MMod, MSt0, {typed, Src, _}) -> + verify_is_match_state_and_get_ptr(MMod, MSt0, Src); +verify_is_match_state_and_get_ptr(MMod, MSt0, Src) -> + % Default case is to check it's boxed + {MSt1, Reg} = MMod:move_to_native_register(MSt0, Src), + MSt2 = verify_is_boxed(MMod, MSt1, Reg), + verify_is_match_state_and_get_ptr0(MMod, MSt2, Reg). + +verify_is_match_state_and_get_ptr0(MMod, MSt0, Reg) -> + MSt1 = MMod:and_(MSt0, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt2, BoxTag} = MMod:get_array_element(MSt1, Reg, 0), + MSt3 = cond_raise_badarg( + {{free, BoxTag}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_BIN_MATCH_STATE}, MMod, MSt2 ), - {MSt4, Reg}. + {MSt3, Reg}. verify_is_immediate(Arg1, ImmediateTag, FailLabel, MMod, MSt0) -> verify_is_immediate(Arg1, ?TERM_IMMED_TAG_MASK, ImmediateTag, FailLabel, MMod, MSt0). diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c index 18d987f5e..a5f5c907e 100644 --- a/src/libAtomVM/module.c +++ b/src/libAtomVM/module.c @@ -616,16 +616,13 @@ term module_get_type_by_index(const Module *mod, int type_index, Context *ctx) return globalcontext_make_atom(ctx->global, ATOM_STR("\x6", "t_atom")); case BEAM_TYPE_BITSTRING: - if (type_bits & BEAM_TYPE_HAS_UNIT) { - if (UNLIKELY(memory_ensure_free(ctx, TUPLE_SIZE(2)) != MEMORY_GC_OK)) { - return globalcontext_make_atom(ctx->global, ATOM_STR("\x3", "any")); - } - term type_tuple = term_alloc_tuple(2, &ctx->heap); - term_put_tuple_element(type_tuple, 0, globalcontext_make_atom(ctx->global, ATOM_STR("\xD", "t_bs_matchable"))); - term_put_tuple_element(type_tuple, 1, term_from_int32(unit)); - return type_tuple; + if (UNLIKELY(memory_ensure_free(ctx, TUPLE_SIZE(2)) != MEMORY_GC_OK)) { + return globalcontext_make_atom(ctx->global, ATOM_STR("\x3", "any")); } - return globalcontext_make_atom(ctx->global, ATOM_STR("\xD", "t_bs_matchable")); + term type_tuple = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(type_tuple, 0, globalcontext_make_atom(ctx->global, ATOM_STR("\xE", "t_bs_matchable"))); + term_put_tuple_element(type_tuple, 1, term_from_int32(unit)); + return type_tuple; case BEAM_TYPE_CONS: return globalcontext_make_atom(ctx->global, ATOM_STR("\x6", "t_cons")); diff --git a/tests/libs/jit/jit_tests.erl b/tests/libs/jit/jit_tests.erl index b9522884a..37570d144 100644 --- a/tests/libs/jit/jit_tests.erl +++ b/tests/libs/jit/jit_tests.erl @@ -91,8 +91,7 @@ check_labels_table0(N, <>) -> check_labels_table0 check_lines_table(<>) -> ok. -% Test term_to_int optimization with typed registers using real BEAM code -term_to_int_typed_optimization_x86_64_test() -> +term_to_int_verify_is_match_state_typed_optimization_x86_64_test() -> % Compile CODE_CHUNK_1 which contains a typed register for term_to_int optimization Stream0 = jit_stream_binary:new(0), <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_1, @@ -128,4 +127,43 @@ term_to_int_typed_optimization_x86_64_test() -> binary:match(CompiledCode, <<16#4c, 16#8b, 16#5f, 16#38, 16#49, 16#c1, 16#eb, 16#04>>) ), + % Check call to bs_start_match3 is followed by a skip of verify_is_boxed + % 100: 48 8b 77 30 mov 0x30(%rdi),%rsi + % 104: 48 c7 c2 00 00 00 00 mov $0x0,%rdx + % 10b: ff d0 callq *%rax + % 10d: 5a pop %rdx + % 10e: 5e pop %rsi + % 10f: 5f pop %rdi + % 110: 48 89 47 40 mov %rax,0x40(%rdi) + % 114: 48 8b 47 40 mov 0x40(%rdi),%rax + % 118: 48 83 e0 fc and $0xfffffffffffffffc,%rax + + % As opposed to: + % 100: 48 8b 77 30 mov 0x30(%rdi),%rsi + % 104: 48 c7 c2 00 00 00 00 mov $0x0,%rdx + % 10b: ff d0 callq *%rax + % 10d: 5a pop %rdx + % 10e: 5e pop %rsi + % 10f: 5f pop %rdi + % 110: 48 89 47 40 mov %rax,0x40(%rdi) + % 114: 48 8b 47 40 mov 0x40(%rdi),%rax + % 118: 49 89 c3 mov %rax,%r11 + % 11b: 41 80 e3 03 and $0x3,%r11b + % 11f: 41 80 fb 02 cmp $0x2,%r11b + % 123: 74 13 je 0x138 + % 125: 48 8b 02 mov (%rdx),%rax + % 128: 48 c7 c2 28 01 00 00 mov $0x128,%rdx + % 12f: 48 c7 c1 0b 01 00 00 mov $0x10b,%rcx + % 136: ff e0 jmpq *%rax + % 138: 48 83 e0 fc and $0xfffffffffffffffc,%rax + ?assertMatch( + {_, 28}, + binary:match( + CompiledCode, + <<16#48, 16#8b, 16#77, 16#30, 16#48, 16#c7, 16#c2, 16#00, 16#00, 16#00, 16#00, 16#ff, + 16#d0, 16#5a, 16#5e, 16#5f, 16#48, 16#89, 16#47, 16#40, 16#48, 16#8b, 16#47, 16#40, + 16#48, 16#83, 16#e0, 16#fc>> + ) + ), + ok. From ac6b947d5f75402e408df8162b40d48cacec1750 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 23 Sep 2025 22:00:57 +0200 Subject: [PATCH 04/46] JIT: optimize verify_is_function using types Signed-off-by: Paul Guyot --- libs/jit/src/jit.erl | 41 +++++++++++------- tests/libs/jit/jit_tests.erl | 83 ++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 16 deletions(-) diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 10a41cd17..4b77c284a 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -949,13 +949,12 @@ first_pass(<>, MMod, MSt0, State0) -> ?TRACE("OP_CALL_FUN ~p\n", [ArgsCount]), MSt1 = MMod:decrement_reductions_and_maybe_schedule_next(MSt0), {MSt2, FuncReg} = read_any_xreg(ArgsCount, MMod, MSt1), - {MSt3, Reg} = MMod:move_to_native_register(MSt2, FuncReg), - MSt4 = verify_is_function(Reg, MMod, MSt3), - MSt5 = MMod:call_primitive_with_cp(MSt4, ?PRIM_CALL_FUN, [ - ctx, jit_state, offset, Reg, ArgsCount + {MSt3, Reg} = verify_is_function(FuncReg, MMod, MSt2), + MSt4 = MMod:call_primitive_with_cp(MSt3, ?PRIM_CALL_FUN, [ + ctx, jit_state, offset, {free, Reg}, ArgsCount ]), - ?ASSERT_ALL_NATIVE_FREE(MSt5), - first_pass(Rest1, MMod, MSt5, State0); + ?ASSERT_ALL_NATIVE_FREE(MSt4), + first_pass(Rest1, MMod, MSt4, State0); % 77 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -2322,18 +2321,17 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Tag, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {ArgsCount, Rest2} = decode_literal(Rest1), - {MSt2, Fun, Rest3} = decode_compact_term(Rest2, MMod, MSt1, State0), + {MSt2, Fun, Rest3} = decode_typed_compact_term(Rest2, MMod, MSt1, State0), ?TRACE("OP_CALL_FUN2 ~p, ~p, ~p\n", [Tag, ArgsCount, Fun]), % We ignore Tag (could be literal 0 or atom unsafe) MSt3 = MMod:free_native_registers(MSt2, [Tag]), MSt4 = MMod:decrement_reductions_and_maybe_schedule_next(MSt3), - {MSt5, Reg} = MMod:move_to_native_register(MSt4, Fun), - MSt6 = verify_is_function(Reg, MMod, MSt5), - MSt7 = MMod:call_primitive_with_cp(MSt6, ?PRIM_CALL_FUN, [ - ctx, jit_state, offset, Reg, ArgsCount + {MSt5, Reg} = verify_is_function(Fun, MMod, MSt4), + MSt6 = MMod:call_primitive_with_cp(MSt5, ?PRIM_CALL_FUN, [ + ctx, jit_state, offset, {free, Reg}, ArgsCount ]), - ?ASSERT_ALL_NATIVE_FREE(MSt7), - first_pass(Rest3, MMod, MSt7, State0); + ?ASSERT_ALL_NATIVE_FREE(MSt6), + first_pass(Rest3, MMod, MSt6, State0); % 180 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -2995,8 +2993,18 @@ term_is_boxed_with_tag_and_get_ptr(Label, Arg1, BoxedTag, MMod, MSt1) -> %% @param MSt0 backend state %% @return new backend state %%----------------------------------------------------------------------------- -verify_is_function(Arg, MMod, MSt0) -> - {MSt1, Reg} = MMod:copy_to_native_register(MSt0, Arg), +verify_is_function({typed, Func, t_fun}, MMod, MSt0) -> + MMod:move_to_native_register(MSt0, Func); +verify_is_function({typed, Func, any}, MMod, MSt0) -> + verify_is_function(Func, MMod, MSt0); +verify_is_function({typed, Func, _Other}, MMod, MSt0) -> + {MSt1, Reg} = MMod:move_to_native_register(MSt0, Func), + MSt2 = MMod:call_primitive_last(MSt1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, Reg + ]), + {MSt2, Reg}; +verify_is_function(Func, MMod, MSt0) -> + {MSt1, Reg} = MMod:copy_to_native_register(MSt0, Func), MSt2 = MMod:if_block(MSt1, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ ctx, jit_state, offset, ?BADFUN_ATOM, Reg @@ -3009,7 +3017,8 @@ verify_is_function(Arg, MMod, MSt0) -> ctx, jit_state, offset, ?BADFUN_ATOM, Reg ]) end), - MMod:free_native_registers(MSt5, [Reg]). + MSt6 = MMod:free_native_registers(MSt5, [Reg]), + MMod:move_to_native_register(MSt6, Func). verify_is_binary_or_match_state(Label, Src, MMod, MSt0) -> {MSt1, Reg} = MMod:copy_to_native_register(MSt0, Src), diff --git a/tests/libs/jit/jit_tests.erl b/tests/libs/jit/jit_tests.erl index 37570d144..72a356ae3 100644 --- a/tests/libs/jit/jit_tests.erl +++ b/tests/libs/jit/jit_tests.erl @@ -48,6 +48,20 @@ <<0, 0, 0, 3, 0, 0, 0, 3, 15, 255, 0, 2, 0, 32>> ). +% Code chunk with typed register from test_call_simple.erl +% Contains call_fun2 opcode with typed register that uses verify_is_function optimization +-define(CODE_CHUNK_2, + <<0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 178, 0, 0, 0, 3, 0, 0, 0, 1, 1, 16, 153, 16, 2, 18, 34, 32, + 1, 32, 77, 21, 19, 12, 0, 32, 153, 32, 178, 50, 16, 87, 19, 16, 18, 0, 19, 3>> +). +-define(ATU8_CHUNK_2, + <<255, 255, 255, 253, 8, 16, 116, 101, 115, 116, 95, 99, 97, 108, 108, 95, 115, 105, 109, 112, + 108, 101, 144, 116, 101, 115, 116, 95, 99, 97, 108, 108, 96, 117, 110, 115, 97, 102, 101>> +). +-define(TYPE_CHUNK_2, + <<0, 0, 0, 3, 0, 0, 0, 2, 15, 255, 0, 16>> +). + compile_minimal_x86_64_test() -> Stream0 = jit_stream_binary:new(0), <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_0, @@ -167,3 +181,72 @@ term_to_int_verify_is_match_state_typed_optimization_x86_64_test() -> ), ok. + +verify_is_function_typed_optimization_x86_64_test() -> + % Compile CODE_CHUNK_1 which contains a typed register for term_to_int optimization + Stream0 = jit_stream_binary:new(0), + <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_2, + Stream1 = jit_stream_binary:append( + Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC) + ), + Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), + + AtomResolver = jit_precompile:atom_resolver(?ATU8_CHUNK_2), + LiteralResolver = fun(_) -> test_literal end, + TypeResolver = jit_precompile:type_resolver(?TYPE_CHUNK_2), + + % Compile with typed register support + {_LabelsCount, Stream3} = jit:compile( + ?CODE_CHUNK_2, AtomResolver, LiteralResolver, TypeResolver, jit_x86_64, Stream2 + ), + CompiledCode = jit_x86_64:stream(Stream3), + + % Check that call to allocate is directly followed by the building the cp + % for call + % b6: 48 8b 42 10 mov 0x10(%rdx),%rax + % ba: ff e0 jmpq *%rax + % bc: 48 8b 47 38 mov 0x38(%rdi),%rax + % c0: 4c 8b 1e mov (%rsi),%r11 + % c3: 45 8b 1b mov (%r11),%r11d + % c6: 49 c1 e3 18 shl $0x18,%r11 + % ... + + % As opposed to: + % b6: 48 8b 42 10 mov 0x10(%rdx),%rax + % ba: ff e0 jmpq *%rax + % bc: 48 8b 47 38 mov 0x38(%rdi),%rax + % c0: 49 89 c3 mov %rax,%r11 + % c3: 4d 89 da mov %r11,%r10 + % c6: 41 80 e2 03 and $0x3,%r10b + % ca: 41 80 fa 02 cmp $0x2,%r10b + % ce: 74 1a je 0xea + % d0: 48 8b 82 98 00 00 00 mov 0x98(%rdx),%rax + % d7: 48 c7 c2 d7 00 00 00 mov $0xd7,%rdx + % de: 48 c7 c1 8b 01 00 00 mov $0x18b,%rcx + % e5: 4d 89 d8 mov %r11,%r8 + % e8: ff e0 jmpq *%rax + % ea: 49 83 e3 fc and $0xfffffffffffffffc,%r11 + % ee: 4d 8b 1b mov (%r11),%r11 + % f1: 4d 89 da mov %r11,%r10 + % f4: 41 80 e2 3f and $0x3f,%r10b + % f8: 41 80 fa 14 cmp $0x14,%r10b + % fc: 74 1a je 0x118 + % fe: 48 8b 82 98 00 00 00 mov 0x98(%rdx),%rax + % 105: 48 c7 c2 05 01 00 00 mov $0x105,%rdx + % 10c: 48 c7 c1 8b 01 00 00 mov $0x18b,%rcx + % 113: 4d 89 d8 mov %r11,%r8 + % 116: ff e0 jmpq *%rax + % 118: 4c 8b 1e mov (%rsi),%r11 + % 11b: 45 8b 1b mov (%r11),%r11d + % 11e: 49 c1 e3 18 shl $0x18,%r11 + % ... + + ?assertMatch( + {_, 20}, + binary:match( + CompiledCode, + <<16#48, 16#8b, 16#42, 16#10, 16#ff, 16#e0, 16#48, 16#8b, 16#47, 16#38, 16#4c, 16#8b, + 16#1e, 16#45, 16#8b, 16#1b, 16#49, 16#c1, 16#e3, 16#18>> + ) + ), + ok. From ab25ecd7cac978b12b13ae13104d68ec9614dc1b Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Wed, 3 Sep 2025 22:54:28 +0200 Subject: [PATCH 05/46] JIT: reduce register usage on binary matching Signed-off-by: Paul Guyot --- libs/jit/src/jit.erl | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 4b77c284a..ffda3357d 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -2907,10 +2907,10 @@ do_get_tail( MSt1 = cond_raise_badarg({BSOffsetReg, '&', 2#111, '!=', 0}, MMod, MSt0), {MSt2, BSOffseBytesReg} = MMod:copy_to_native_register(MSt1, BSOffsetReg), MSt3 = MMod:shift_right(MSt2, BSOffseBytesReg, 3), - {MSt4, TailBytesReg} = MMod:get_array_element(MSt3, BSBinaryReg, 1), - MSt5 = MMod:sub(MSt4, TailBytesReg, BSOffseBytesReg), + {MSt4, TailBytesReg0} = MMod:get_array_element(MSt3, BSBinaryReg, 1), + MSt5 = MMod:sub(MSt4, TailBytesReg0, BSOffseBytesReg), {MSt6, HeapSizeReg} = MMod:call_primitive(MSt5, ?PRIM_TERM_SUB_BINARY_HEAP_SIZE, [ - BSBinaryReg, TailBytesReg + BSBinaryReg, {free, TailBytesReg0} ]), {MSt7, NewMatchState} = memory_ensure_free_with_extra_root( MatchState, Live, {free, HeapSizeReg}, MMod, MSt6 @@ -2920,13 +2920,17 @@ do_get_tail( MSt9 = MMod:and_(MSt8, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), MSt10 = MMod:move_array_element(MSt9, MatchStateReg0, 1, BSBinaryReg), MSt11 = MMod:free_native_registers(MSt10, [MatchStateReg0]), - {MSt12, ResultTerm} = MMod:call_primitive(MSt11, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ - ctx, BSBinaryReg, {free, BSOffseBytesReg}, TailBytesReg - ]), - MSt13 = MMod:shift_left(MSt12, TailBytesReg, 3), - MSt14 = MMod:add(MSt13, BSOffsetReg, TailBytesReg), - MSt15 = MMod:free_native_registers(MSt14, [TailBytesReg]), - {MSt15, ResultTerm, NewMatchState}. + MSt12 = MMod:and_(MSt11, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt13, TailBytesReg1} = MMod:get_array_element(MSt12, BSBinaryReg, 1), + MSt14 = MMod:sub(MSt13, TailBytesReg0, BSOffseBytesReg), + MSt15 = MMod:add(MSt14, BSBinaryReg, ?TERM_PRIMARY_BOXED), + {MSt16, ResultTerm} = MMod:call_primitive(MSt15, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ + ctx, BSBinaryReg, {free, BSOffseBytesReg}, TailBytesReg1 + ]), + MSt17 = MMod:shift_left(MSt16, TailBytesReg1, 3), + MSt18 = MMod:add(MSt17, BSOffsetReg, TailBytesReg1), + MSt19 = MMod:free_native_registers(MSt18, [TailBytesReg1]), + {MSt19, ResultTerm, NewMatchState}. first_pass_bs_match_equal_colon_equal( Fail, MatchState, BSBinaryReg, BSOffsetReg, J0, Rest0, MMod, MSt0 From 96c584261b63aa492960c0e55e16c560b8078afc Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 20 Jul 2025 19:02:58 +0200 Subject: [PATCH 06/46] AArch64: initial commit Signed-off-by: Paul Guyot --- libs/jit/include/jit.hrl | 1 + libs/jit/src/CMakeLists.txt | 2 + libs/jit/src/jit_aarch64.erl | 2112 ++++++++++++++++++++++ libs/jit/src/jit_aarch64_asm.erl | 822 +++++++++ src/libAtomVM/jit.h | 1 + tests/libs/jit/CMakeLists.txt | 2 + tests/libs/jit/jit_aarch64_asm_tests.erl | 300 +++ tests/libs/jit/jit_aarch64_tests.erl | 367 ++++ tests/libs/jit/tests.erl | 1 + 9 files changed, 3608 insertions(+) create mode 100644 libs/jit/src/jit_aarch64.erl create mode 100644 libs/jit/src/jit_aarch64_asm.erl create mode 100644 tests/libs/jit/jit_aarch64_asm_tests.erl create mode 100644 tests/libs/jit/jit_aarch64_tests.erl diff --git a/libs/jit/include/jit.hrl b/libs/jit/include/jit.hrl index 8fe0d6bde..427fa40ae 100644 --- a/libs/jit/include/jit.hrl +++ b/libs/jit/include/jit.hrl @@ -21,6 +21,7 @@ -define(JIT_FORMAT_VERSION, 1). -define(JIT_ARCH_X86_64, 1). +-define(JIT_ARCH_AARCH64, 2). -define(JIT_VARIANT_PIC, 1). diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index bc97ea690..a5810feff 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -27,6 +27,8 @@ set(ERLANG_MODULES jit_precompile jit_stream_binary jit_stream_mmap + jit_aarch64 + jit_aarch64_asm jit_x86_64 jit_x86_64_asm ) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl new file mode 100644 index 000000000..fe3e57a94 --- /dev/null +++ b/libs/jit/src/jit_aarch64.erl @@ -0,0 +1,2112 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_aarch64). + +-export([ + word_size/0, + new/3, + stream/1, + offset/1, + debugger/1, + used_regs/1, + available_regs/1, + free_native_registers/2, + assert_all_native_free/1, + jump_table/2, + update_branches/2, + call_primitive/3, + call_primitive_last/3, + call_primitive_with_cp/3, + return_if_not_equal_to_ctx/2, + jump_to_label/2, + if_block/3, + if_else_block/4, + shift_right/3, + shift_left/3, + move_to_vm_register/3, + move_to_native_register/2, + move_to_native_register/3, + move_to_cp/2, + move_array_element/4, + move_to_array_element/4, + move_to_array_element/5, + set_bs/2, + copy_to_native_register/2, + get_array_element/3, + increment_sp/2, + set_continuation_to_label/2, + set_continuation_to_offset/1, + get_module_index/1, + and_/3, + or_/3, + add/3, + sub/3, + mul/3, + decrement_reductions_and_maybe_schedule_next/1, + call_or_schedule_next/2, + call_only_or_schedule_next/2, + call_func_ptr/3, + return_labels_and_lines/3 +]). + +-include_lib("jit.hrl"). + +-include("primitives.hrl"). + +-define(ASSERT(Expr), true = Expr). + +%% AArch64 ABI: r0-r7 are used for argument passing and return value. +%% r8 is the indirect result location register (platform-specific), +%% r9-r15 are caller-saved scratch registers (used by JIT), +%% r16-r17 are intra-procedure-call scratch registers, +%% r18 is platform register (reserved), +%% r19-r28 are callee-saved, +%% r29 is frame pointer, r30 is link register, r31 is stack pointer/zero. +%% d0-d7 are used for FP argument passing and return value. +%% d8-d15 are callee-saved FP registers. +%% +%% See: Arm® Architecture Procedure Call Standard (AAPCS64) +%% https://developer.arm.com/documentation/ihi0055/latest/ +%% +%% Registers used by the JIT backend: +%% - Scratch GPRs: r9-r15 +%% - Argument/return: r0-r7, d0-d7 +%% - Stack pointer: r31 (sp) +%% - Frame pointer: r29 +%% - Link register: r30 +%% - Indirect result: r8 +%% +%% Note: r18 is reserved for platform use and must not be used. +%% +%% For more details, refer to the AArch64 Procedure Call Standard. + +-type aarch64_register() :: + r0 + | r1 + | r2 + | r3 + | r4 + | r5 + | r6 + | r7 + | r8 + | r9 + | r10 + | r11 + | r12 + | r13 + | r14 + | r15 + | d0 + | d1 + | d2 + | d3 + | d4 + | d5 + | d6 + | d7. + +-define(IS_GPR(Reg), + (Reg =:= r0 orelse Reg =:= r1 orelse Reg =:= r2 orelse Reg =:= r3 orelse Reg =:= r4 orelse + Reg =:= r5 orelse Reg =:= r6 orelse Reg =:= r7 orelse Reg =:= r8 orelse Reg =:= r9 orelse + Reg =:= r10 orelse Reg =:= r11 orelse Reg =:= r12 orelse Reg =:= r13 orelse Reg =:= r14 orelse + Reg =:= r15) +). +-define(IS_FPR(Reg), + (Reg =:= d0 orelse Reg =:= d1 orelse Reg =:= d2 orelse Reg =:= d3 orelse Reg =:= d4 orelse + Reg =:= d5 orelse Reg =:= d6 orelse Reg =:= d7) +). + +-type stream() :: any(). + +-record(state, { + stream_module :: module(), + stream :: stream(), + offset :: non_neg_integer(), + branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + available_regs :: [aarch64_register()], + available_fpregs :: [aarch64_register()], + used_regs :: [aarch64_register()] +}). + +-type state() :: #state{}. +-type immediate() :: non_neg_integer(). +-type vm_register() :: + {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, aarch64_register()}. +-type value() :: immediate() | vm_register() | aarch64_register() | {ptr, aarch64_register()}. +-type arg() :: ctx | jit_state | offset | value() | {free, value()}. + +-type maybe_free_aarch64_register() :: + {free, aarch64_register()} | aarch64_register(). + +-type condition() :: + {aarch64_register(), '<', 0} + | {maybe_free_aarch64_register(), '==', 0} + | {maybe_free_aarch64_register(), '!=', integer()} + | {'(uint8_t)', maybe_free_aarch64_register(), '==', false} + | {'(uint8_t)', maybe_free_aarch64_register(), '!=', false} + | {maybe_free_aarch64_register(), '&', non_neg_integer(), '!=', 0}. + +% ctx->e is 0x28 +% ctx->x is 0x30 +-define(CTX_REG, r0). +-define(JITSTATE_REG, r1). +-define(NATIVE_INTERFACE_REG, r2). +-define(Y_REGS, {16#28, ?CTX_REG}). +-define(X_REG(N), {16#30 + (N * 8), ?CTX_REG}). +-define(CP, {16#B8, ?CTX_REG}). +-define(FP_REGS, {16#C0, ?CTX_REG}). +-define(BS, {16#C8, ?CTX_REG}). +-define(BS_OFFSET, {16#D0, ?CTX_REG}). +-define(JITSTATE_MODULE, {0, ?JITSTATE_REG}). +-define(JITSTATE_CONTINUATION, {16#8, ?JITSTATE_REG}). +-define(JITSTATE_REDUCTIONCOUNT, {16#10, ?JITSTATE_REG}). +-define(PRIMITIVE(N), {N * 8, ?NATIVE_INTERFACE_REG}). +-define(MODULE_INDEX(ModuleReg), {0, ModuleReg}). + +% aarch64 ABI specific +-define(LR_REG, r30). +-define(IP0_REG, r16). + +-define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127). +-define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000). +-define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255). +-define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000). + +-define(AVAILABLE_REGS, [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6]). +-define(AVAILABLE_FPREGS, [d0, d1, d2, d3, d4, d5, d6, d7]). +-define(PARAMETER_REGS, [r0, r1, r2, r3, r4, r5]). +-define(PARAMETER_FPREGS, [d0, d1, d2, d3, d4, d5]). + +%%----------------------------------------------------------------------------- +%% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. +%% sizeof(uintptr_t) +%% +%% C code equivalent is: +%% #if UINTPTR_MAX == UINT32_MAX +%% #define TERM_BYTES 4 +%% #elif UINTPTR_MAX == UINT64_MAX +%% #define TERM_BYTES 8 +%% #else +%% #error "Term size must be either 32 bit or 64 bit." +%% #endif +%% +%% @end +%% @return Word size in bytes +%%----------------------------------------------------------------------------- +-spec word_size() -> 4 | 8. +word_size() -> 8. + +%%----------------------------------------------------------------------------- +%% @doc Create a new backend state for provided variant, module and stream. +%% @end +%% @param Variant JIT variant to use (currently ?JIT_VARIANT_PIC) +%% @param StreamModule module to stream instructions +%% @param Stream stream state +%% @return New backend state +%%----------------------------------------------------------------------------- +-spec new(any(), module(), stream()) -> state(). +new(_Variant, StreamModule, Stream) -> + #state{ + stream_module = StreamModule, + stream = Stream, + branches = [], + offset = StreamModule:offset(Stream), + available_regs = ?AVAILABLE_REGS, + available_fpregs = ?AVAILABLE_FPREGS, + used_regs = [] + }. + +%%----------------------------------------------------------------------------- +%% @doc Access the stream object. +%% @end +%% @param State current backend state +%% @return The stream object +%%----------------------------------------------------------------------------- +-spec stream(state()) -> stream(). +stream(#state{stream = Stream}) -> + Stream. + +%%----------------------------------------------------------------------------- +%% @doc Get the current offset in the stream +%% @end +%% @param State current backend state +%% @return The current offset +%%----------------------------------------------------------------------------- +-spec offset(state()) -> non_neg_integer(). +offset(#state{stream_module = StreamModule, stream = Stream}) -> + StreamModule:offset(Stream). + +%%----------------------------------------------------------------------------- +%% @doc Emit a debugger of breakpoint instruction. This is used for debugging +%% and not in production. +%% @end +%% @param State current backend state +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec debugger(state()) -> state(). +debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:brk(0)), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently used native registers. This is used for +%% debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of used registers +%%----------------------------------------------------------------------------- +-spec used_regs(state()) -> [aarch64_register()]. +used_regs(#state{used_regs = Used}) -> Used. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently available native scratch registers. This +%% is used for debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of available registers +%%----------------------------------------------------------------------------- +-spec available_regs(state()) -> [aarch64_register()]. +available_regs(#state{available_regs = Available}) -> Available. + +%%----------------------------------------------------------------------------- +%% @doc Free native registers. The passed list of registers can contain +%% registers, pointer to registers or other values that are ignored. +%% @end +%% @param State current backend state +%% @param Regs list of registers or other values +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec free_native_registers(state(), [value()]) -> state(). +free_native_registers(State, []) -> + State; +free_native_registers(State, [Reg | Rest]) -> + State1 = free_native_register(State, Reg), + free_native_registers(State1, Rest). + +-spec free_native_register(state(), value()) -> state(). +free_native_register( + #state{available_regs = Available0, available_fpregs = AvailableFP0, used_regs = Used0} = State, + Reg +) when + is_atom(Reg) +-> + {Available1, AvailableFP1, Used1} = free_reg(Available0, AvailableFP0, Used0, Reg), + State#state{available_regs = Available1, available_fpregs = AvailableFP1, used_regs = Used1}; +free_native_register(State, {ptr, Reg}) -> + free_native_register(State, Reg); +free_native_register(State, _Other) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Assert that all native scratch registers are available. This is used +%% for debugging and not in production. +%% @end +%% @param State current backend state +%% @return ok +%%----------------------------------------------------------------------------- +-spec assert_all_native_free(state()) -> ok. +assert_all_native_free(#state{ + available_regs = ?AVAILABLE_REGS, available_fpregs = ?AVAILABLE_FPREGS, used_regs = [] +}) -> + ok. + +%%----------------------------------------------------------------------------- +%% @doc Emit the jump table at the beginning of the module. Branches will be +%% updated afterwards with update_branches/2. Emit branches for labels from +%% 0 (special entry for lines and labels information) to LabelsCount included +%% (special entry for OP_INT_CALL_END). +%% @end +%% @param State current backend state +%% @param LabelsCount number of labels in the module. +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec jump_table(state(), pos_integer()) -> state(). +jump_table(State, LabelsCount) -> + jump_table0(State, 0, LabelsCount). + +jump_table0(State, N, LabelsCount) when N > LabelsCount -> + State; +jump_table0( + #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + N, + LabelsCount +) -> + Offset = StreamModule:offset(Stream0), + BranchInstr = jit_aarch64_asm:b(0), + Reloc = {N, Offset, 32}, + Stream1 = StreamModule:append(Stream0, BranchInstr), + jump_table0(State#state{stream = Stream1, branches = [Reloc | Branches]}, N + 1, LabelsCount). + +%%----------------------------------------------------------------------------- +%% @doc Rewrite stream to update all branches for labels. +%% @end +%% @param State current backend state +%% @param Labels list of tuples with label, offset and size of the branch in bits +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec update_branches(state(), [{non_neg_integer(), non_neg_integer()}]) -> state(). +update_branches(#state{branches = []} = State, _Labels) -> + State; +update_branches( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = [{Label, Offset, Size} | BranchesT] + } = State, + Labels +) -> + {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), + Rel = ((LabelOffset - Offset) div 4), + Patched = <<(16#14000000 bor (Rel band 16#03FFFFFF)):32>>, + Stream1 = StreamModule:map(Stream0, Offset, Size div 8, fun(_) -> Patched end), + update_branches(State#state{stream = Stream1, branches = BranchesT}, Labels). + +%%----------------------------------------------------------------------------- +%% @doc Emit a call (call with return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. It also saves scratch registers we need to preserve. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec call_primitive(state(), non_neg_integer(), [arg()]) -> {state(), aarch64_register()}. +call_primitive( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + Primitive, + Args +) -> + PrepCall = + case Primitive of + 0 -> + jit_aarch64_asm:ldr(?IP0_REG, {0, ?NATIVE_INTERFACE_REG}); + N -> + jit_aarch64_asm:ldr(?IP0_REG, {N * 8, ?NATIVE_INTERFACE_REG}) + end, + Stream1 = StreamModule:append(Stream0, PrepCall), + StateCall = State#state{stream = Stream1}, + call_func_ptr(StateCall, {free, ?IP0_REG}, Args). + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump (call without return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +call_primitive_last( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + Primitive, + Args +) -> + % We need a register for the function pointer that should not be used as a parameter + % Since we're not returning, we can use all scratch registers except + % registers used for parameters + ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), + ArgsRegs = args_regs(Args), + ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs, + [Temp | AvailableRegs1] = ScratchRegs, + UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1, + PrepCall = + case Primitive of + 0 -> + jit_aarch64_asm:ldr(Temp, {0, ?NATIVE_INTERFACE_REG}); + N -> + jit_aarch64_asm:ldr(Temp, {N * 8, ?NATIVE_INTERFACE_REG}) + end, + Stream1 = StreamModule:append(Stream0, PrepCall), + State1 = set_args( + State0#state{ + stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs + }, + Args + ), + #state{stream = Stream2} = State1, + Call = jit_aarch64_asm:br(Temp), + Stream3 = StreamModule:append(Stream2, Call), + State1#state{stream = Stream3, available_regs = ?AVAILABLE_REGS, used_regs = []}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a return of a value if it's not equal to ctx. +%% This logic is used to break out to the scheduler, typically after signal +%% messages have been processed. +%% @end +%% @param State current backend state +%% @param Reg register to compare to (should be {free, Reg} as it's always freed) +%% @return Updated backend state +%%----------------------------------------------------------------------------- +return_if_not_equal_to_ctx( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + available_fpregs = AvailableFPRegs0, + used_regs = UsedRegs0 + } = State, + {free, Reg} +) -> + I1 = jit_aarch64_asm:cmp(Reg, ?CTX_REG), + I3 = + case Reg of + % Return value is already in r0 + r0 -> <<>>; + % Move to r0 (return register) + _ -> jit_aarch64_asm:orr(r0, xzr, Reg) + end, + I4 = jit_aarch64_asm:ret(), + I2 = jit_aarch64_asm:bcc(eq, byte_size(I3) + byte_size(I4)), + Stream1 = StreamModule:append(Stream0, <>), + {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( + AvailableRegs0, AvailableFPRegs0, UsedRegs0, Reg + ), + State#state{ + stream = Stream1, + available_regs = AvailableRegs1, + available_fpregs = AvailableFPRegs1, + used_regs = UsedRegs1 + }. + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump to a label. The offset of the relocation is saved and will +%% be updated with `update_branches/2`. +%% @end +%% @param State current backend state +%% @param Label to jump to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_label( + #state{stream_module = StreamModule, stream = Stream0, branches = AccBranches} = State, Label +) -> + Offset = StreamModule:offset(Stream0), + %% Use unconditional branch instruction B + + % Placeholder offset, will be patched + I1 = jit_aarch64_asm:b(-4), + % Offset is at the beginning of the instruction + RelocOffset = 0, + % AArch64 B instruction uses 26-bit offset + Reloc = {Label, Offset + RelocOffset, 26}, + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1, branches = [Reloc | AccBranches]}. + +%%----------------------------------------------------------------------------- +%% @doc Emit an if block, i.e. emit a test of a condition and conditionnally +%% execute a block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockFn function to emit the block that may be executed +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_block(state(), condition() | {'and', [condition()]}, fun((state()) -> state())) -> state(). +if_block( + #state{stream_module = StreamModule} = State0, + {'and', CondList}, + BlockFn +) -> + {Replacements, State1} = lists:foldl( + fun(Cond, {AccReplacements, AccState}) -> + Offset = StreamModule:offset(AccState#state.stream), + {NewAccState, ReplaceDelta} = if_block_cond(AccState, Cond), + {[Offset + ReplaceDelta | AccReplacements], NewAccState} + end, + {[], State0}, + CondList + ), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + Stream3 = lists:foldl( + fun(ReplacementOffset, AccStream) -> + BranchOffset = OffsetAfter - ReplacementOffset, + StreamModule:map(Stream2, ReplacementOffset, 4, fun(PrevValue) -> + jit_aarch64_asm:patch_bcc_offset(PrevValue, BranchOffset) + end) + end, + Stream2, + Replacements + ), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs); +if_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, BranchInstrOffset} = if_block_cond(State0, Cond), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + %% Patch the conditional branch instruction to jump to the end of the block + BranchOffset = OffsetAfter - (Offset + BranchInstrOffset), + Stream3 = StreamModule:map(Stream2, Offset + BranchInstrOffset, 4, fun(PrevValue) -> + jit_aarch64_asm:patch_bcc_offset(PrevValue, BranchOffset) + end), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if else block, i.e. emit a test of a condition and +%% conditionnally execute a block or another block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockTrueFn function to emit the block that is executed if condition is true +%% @param BlockFalseFn function to emit the block that is executed if condition is false +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_else_block(state(), condition(), fun((state()) -> state()), fun((state()) -> state())) -> + state(). +if_else_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockTrueFn, + BlockFalseFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, BranchInstrOffset} = if_block_cond(State0, Cond), + OffsetAfterCond = StreamModule:offset(State1#state.stream), + State2 = BlockTrueFn(State1), + Stream2 = State2#state.stream, + ElseJumpOffset = StreamModule:offset(Stream2), + %% Emit unconditional branch to skip the else block + + % Placeholder offset, will be patched + I = jit_aarch64_asm:b(0), + Stream3 = StreamModule:append(Stream2, I), + OffsetAfterJump = StreamModule:offset(Stream3), + %% Patch the conditional branch to jump to the else block + ElseBranchOffset = OffsetAfterJump - (Offset + BranchInstrOffset), + Stream4 = StreamModule:map(Stream3, Offset + BranchInstrOffset, 4, fun(PrevValue) -> + jit_aarch64_asm:patch_bcc_offset(PrevValue, ElseBranchOffset) + end), + StateElse = State2#state{ + stream = Stream4, + used_regs = State1#state.used_regs, + available_regs = State1#state.available_regs, + available_fpregs = State1#state.available_fpregs + }, + State3 = BlockFalseFn(StateElse), + Stream5 = State3#state.stream, + OffsetFinal = StreamModule:offset(Stream5), + %% Patch the unconditional branch to jump to the end + FinalJumpOffset = OffsetFinal - OffsetAfterJump, + Stream6 = StreamModule:map(Stream5, ElseJumpOffset, 4, fun(PrevValue) -> + jit_aarch64_asm:patch_b_offset(PrevValue, FinalJumpOffset) + end), + merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs). + +-spec if_block_cond(state(), condition()) -> {state(), non_neg_integer()}. +if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> + I1 = jit_aarch64_asm:tst(Reg, Reg), + % pl = positive or zero (>=0) + I2 = jit_aarch64_asm:bcc(pl, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = State0#state{stream = Stream1}, + {State1, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegA, '<', RegB} +) when ?IS_GPR(RegA) -> + I1 = jit_aarch64_asm:cmp(RegA, RegB), + % ge = greater than or equal + I2 = jit_aarch64_asm:bcc(ge, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = State0#state{stream = Stream1}, + {State1, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_aarch64_asm:tst(Reg, Reg), + % ne = not equal + I2 = jit_aarch64_asm:bcc(ne, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {'(int)', RegOrTuple, '==', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_aarch64_asm:tst32(Reg, Reg), + I2 = jit_aarch64_asm:bcc(ne, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) orelse ?IS_GPR(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = + case Val of + V when is_integer(V) -> jit_aarch64_asm:cmp(Reg, V); + V when is_atom(V) -> jit_aarch64_asm:cmp(Reg, V) + end, + I2 = jit_aarch64_asm:bcc(eq, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {'(int)', RegOrTuple, '!=', Val} +) when is_integer(Val) orelse ?IS_GPR(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = + case Val of + V when is_integer(V) -> jit_aarch64_asm:cmp32(Reg, V); + V when is_atom(V) -> jit_aarch64_asm:cmp32(Reg, V) + end, + I2 = jit_aarch64_asm:bcc(eq, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) orelse ?IS_GPR(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = + case Val of + V when is_integer(V) -> jit_aarch64_asm:cmp(Reg, V); + V when is_atom(V) -> jit_aarch64_asm:cmp(Reg, V) + end, + I2 = jit_aarch64_asm:bcc(ne, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {'(int)', RegOrTuple, '==', Val} +) when is_integer(Val) orelse ?IS_GPR(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = + case Val of + V when is_integer(V) -> jit_aarch64_asm:cmp32(Reg, V); + V when is_atom(V) -> jit_aarch64_asm:cmp32(Reg, V) + end, + I2 = jit_aarch64_asm:bcc(ne, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {'(uint8_t)', RegOrTuple, '==', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Test low 8 bits + I1 = jit_aarch64_asm:tst32(Reg, 16#FF), + I2 = jit_aarch64_asm:bcc(ne, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {'(uint8_t)', RegOrTuple, '!=', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Test low 8 bits + I1 = jit_aarch64_asm:tst32(Reg, 16#FF), + I2 = jit_aarch64_asm:bcc(eq, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {Reg, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + % Move Reg to Temp + I1 = jit_aarch64_asm:orr(Temp, xzr, Reg), + % AND with mask + I2 = jit_aarch64_asm:and_reg(Temp, Temp, Mask), + % Compare with value + I3 = jit_aarch64_asm:cmp(Temp, Val), + I4 = jit_aarch64_asm:bcc(eq, 0), + Code = << + I1/binary, + I2/binary, + I3/binary, + I4/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = State0#state{stream = Stream1}, + {State1, byte_size(I1) + byte_size(I2) + byte_size(I3)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + {{free, Reg} = RegTuple, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + % AND with mask + I1 = jit_aarch64_asm:and_reg(Reg, Reg, Mask), + % Compare with value + I2 = jit_aarch64_asm:cmp(Reg, Val), + I3 = jit_aarch64_asm:bcc(eq, 0), + Code = << + I1/binary, + I2/binary, + I3/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1) + byte_size(I2)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + {RegOrTuple, '&', Val} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Test bits + I1 = jit_aarch64_asm:tst(Reg, Val), + I2 = jit_aarch64_asm:bcc(eq, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + {'(uint8_t)', RegOrTuple, '&', Val} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Test 8-bit value + I1 = jit_aarch64_asm:tst32(Reg, Val), + I2 = jit_aarch64_asm:bcc(eq, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}. + +-spec if_block_free_reg(aarch64_register() | {free, aarch64_register()}, state()) -> state(). +if_block_free_reg({free, Reg}, State0) -> + #state{available_regs = AvR0, available_fpregs = AvFR0, used_regs = UR0} = State0, + {AvR1, AvFR1, UR1} = free_reg(AvR0, AvFR0, UR0, Reg), + State0#state{ + available_regs = AvR1, + available_fpregs = AvFR1, + used_regs = UR1 + }; +if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> + State0. + +-spec merge_used_regs(state(), [aarch64_register()]) -> state(). +merge_used_regs(#state{used_regs = UR0, available_regs = AvR0, available_fpregs = AvFR0} = State, [ + Reg | T +]) -> + case lists:member(Reg, UR0) of + true -> + merge_used_regs(State, T); + false -> + AvR1 = lists:delete(Reg, AvR0), + AvFR1 = lists:delete(Reg, AvFR0), + UR1 = [Reg | UR0], + merge_used_regs( + State#state{used_regs = UR1, available_regs = AvR1, available_fpregs = AvFR1}, T + ) + end; +merge_used_regs(State, []) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register right by a fixed number of bits, effectively +%% dividing it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_aarch64_asm:lsr(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register left by a fixed number of bits, effectively +%% multiplying it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when + is_atom(Reg) +-> + I = jit_aarch64_asm:lsl(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call to a function pointer with arguments. This function converts +%% arguments and passes them following the backend ABI convention. +%% @end +%% @param State current backend state +%% @param FuncPtrTuple either {free, Reg} or {primitive, PrimitiveIndex} +%% @param Args arguments to pass to the function +%% @return Updated backend state and return register +%%----------------------------------------------------------------------------- +-spec call_func_ptr(state(), {free, aarch64_register()} | {primitive, non_neg_integer()}, [arg()]) -> + {state(), aarch64_register()}. +call_func_ptr( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + available_fpregs = AvailableFP0, + used_regs = UsedRegs0 + } = State0, + FuncPtrTuple, + Args +) -> + FreeRegs = lists:flatmap( + fun + ({free, ?IP0_REG}) -> []; + ({free, {ptr, Reg}}) -> [Reg]; + ({free, Reg}) when is_atom(Reg) -> [Reg]; + (_) -> [] + end, + [FuncPtrTuple | Args] + ), + UsedRegs1 = UsedRegs0 -- FreeRegs, + SavedRegs = [?LR_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], + {SavedRegsOdd, Stream1} = push_registers(SavedRegs, StreamModule, Stream0), + + % Set up arguments following AArch64 calling convention + State1 = set_args(State0#state{stream = Stream1}, Args), + #state{stream = Stream2} = State1, + + {FuncPtrReg, Stream3} = + case FuncPtrTuple of + {free, Reg} -> + {Reg, Stream2}; + {primitive, Primitive} -> + % We use r16 for the address. + PrepCall = + case Primitive of + 0 -> + jit_aarch64_asm:ldr(?IP0_REG, {0, ?NATIVE_INTERFACE_REG}); + N -> + jit_aarch64_asm:ldr(?IP0_REG, {N * 8, ?NATIVE_INTERFACE_REG}) + end, + {?IP0_REG, StreamModule:append(Stream2, PrepCall)} + end, + + % Call the function pointer (using BLR for call with return) + Call = jit_aarch64_asm:blr(FuncPtrReg), + Stream4 = StreamModule:append(Stream3, Call), + + % If r0 is in used regs, save it to another temporary register + AvailableRegs1 = FreeRegs ++ AvailableRegs0, + {Stream5, ResultReg} = + case lists:member(r0, SavedRegs) of + true -> + [Temp | _] = AvailableRegs1, + {StreamModule:append(Stream4, jit_aarch64_asm:mov(Temp, r0)), Temp}; + false -> + {Stream4, r0} + end, + + Stream6 = pop_registers(SavedRegsOdd, lists:reverse(SavedRegs), StreamModule, Stream5), + + AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), + AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), + AvailableFP1 = FreeRegs ++ AvailableFP0, + AvailableFP2 = lists:delete(ResultReg, AvailableFP1), + AvailableFP3 = ?AVAILABLE_FPREGS -- (?AVAILABLE_FPREGS -- AvailableFP2), + UsedRegs2 = [ResultReg | UsedRegs1], + { + State1#state{ + stream = Stream6, + available_regs = AvailableRegs3, + available_fpregs = AvailableFP3, + used_regs = UsedRegs2 + }, + ResultReg + }. + +push_registers([RegA, RegB | Tail], StreamModule, Stream0) -> + Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:stp_x(RegA, RegB, {sp, -16}, '!')), + push_registers(Tail, StreamModule, Stream1); +push_registers([], _StreamModule, Stream0) -> + {false, Stream0}; +push_registers([RegA], StreamModule, Stream0) -> + Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:str_x(RegA, {sp, -16}, '!')), + {true, Stream1}. + +pop_registers(true, [Reg | Tail], StreamModule, Stream0) -> + % Odd number of registers, pop the last one first + Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:ldr_x(Reg, {sp}, 16)), + pop_registers(false, Tail, StreamModule, Stream1); +pop_registers(false, [], _StreamModule, Stream0) -> + Stream0; +pop_registers(false, [RegB, RegA | Tail], StreamModule, Stream0) -> + Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:ldp_x(RegA, RegB, {sp}, 16)), + pop_registers(false, Tail, StreamModule, Stream1). + +-spec set_args(state(), [arg()]) -> state(). +set_args( + #state{stream = Stream0, stream_module = StreamModule, used_regs = UsedRegs} = State0, Args +) -> + ParamRegs = parameter_regs(Args), + ArgsRegs = args_regs(Args), + AvailableScratchGP = + [rdi, rsi, rdx, rcx, r8, r9, r10, r11] -- ParamRegs -- ArgsRegs -- UsedRegs, + AvailableScratchFP = ?AVAILABLE_FPREGS -- ParamRegs -- ArgsRegs -- UsedRegs, + Offset = StreamModule:offset(Stream0), + Args1 = [ + case Arg of + offset -> Offset; + _ -> Arg + end + || Arg <- Args + ], + SetArgsCode = set_args0(Args1, ArgsRegs, ParamRegs, AvailableScratchGP, AvailableScratchFP, []), + Stream1 = StreamModule:append(Stream0, SetArgsCode), + NewUsedRegs = lists:foldl( + fun + ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed); + ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed); + (_, AccUsed) -> AccUsed + end, + UsedRegs, + Args + ), + State0#state{ + stream = Stream1, + available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, + available_fpregs = ?AVAILABLE_FPREGS -- ParamRegs -- NewUsedRegs, + used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) + }. + +parameter_regs(Args) -> + parameter_regs0(Args, ?PARAMETER_REGS, ?PARAMETER_FPREGS, []). + +parameter_regs0([], _, _, Acc) -> + lists:reverse(Acc); +parameter_regs0([Special | T], [GPReg | GPRegsT], FPRegs, Acc) when + Special =:= ctx orelse Special =:= jit_state orelse Special =:= offset +-> + parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); +parameter_regs0([{free, Free} | T], GPRegs, FPRegs, Acc) -> + parameter_regs0([Free | T], GPRegs, FPRegs, Acc); +parameter_regs0([{ptr, Reg} | T], [GPReg | GPRegsT], FPRegs, Acc) when ?IS_GPR(Reg) -> + parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); +parameter_regs0([Reg | T], [GPReg | GPRegsT], FPRegs, Acc) when ?IS_GPR(Reg) -> + parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); +parameter_regs0([Reg | T], GPRegs, [FPReg | FPRegsT], Acc) when ?IS_FPR(Reg) -> + parameter_regs0(T, GPRegs, FPRegsT, [FPReg | Acc]); +parameter_regs0([{x_reg, _} | T], [GPReg | GPRegsT], FPRegs, Acc) -> + parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); +parameter_regs0([{y_reg, _} | T], [GPReg | GPRegsT], FPRegs, Acc) -> + parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); +parameter_regs0([{fp_reg, _} | T], GPRegs, [FPReg | FPRegsT], Acc) -> + parameter_regs0(T, GPRegs, FPRegsT, [FPReg | Acc]); +parameter_regs0([Int | T], [GPReg | GPRegsT], FPRegs, Acc) when is_integer(Int) -> + parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]). + +replace_reg(Args, Reg1, Reg2) -> + replace_reg0(Args, Reg1, Reg2, []). + +replace_reg0([Reg | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([Other | T], Reg, Replacement, Acc) -> + replace_reg0(T, Reg, Replacement, [Other | Acc]). + +set_args0([], [], [], _AvailGP, _AvailFP, Acc) -> + list_to_binary(lists:reverse(Acc)); +set_args0([{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc) -> + set_args0([FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc); +set_args0([ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, AvailFP, Acc) -> + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc); +set_args0( + [jit_state | ArgsT], + [?JITSTATE_REG | ArgsRegs], + [?JITSTATE_REG | ParamRegs], + AvailGP, + AvailFP, + Acc +) -> + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc); +set_args0( + [jit_state | ArgsT], [?JITSTATE_REG | ArgsRegs], [ParamReg | ParamRegs], AvailGP, AvailFP, Acc +) -> + false = lists:member(ParamReg, ArgsRegs), + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, [ + jit_aarch64_asm:mov(ParamReg, ?JITSTATE_REG) | Acc + ]); +% ctx is special as we need it to access x_reg/y_reg/fp_reg +set_args0([Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, AvailFP, Acc) -> + false = lists:member(?CTX_REG, ArgsRegs), + J = set_args1(Arg, ?CTX_REG), + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, [J | Acc]); +set_args0( + [Arg | ArgsT], + [_ArgReg | ArgsRegs], + [ParamReg | ParamRegs], + [Avail | AvailGPT] = AvailGP, + AvailFP, + Acc +) -> + J = set_args1(Arg, ParamReg), + case lists:member(ParamReg, ArgsRegs) of + false -> + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, [J | Acc]); + true -> + I = jit_aarch64_asm:mov(Avail, ParamReg), + NewArgsT = replace_reg(ArgsT, ParamReg, Avail), + set_args0(NewArgsT, ArgsRegs, ParamRegs, AvailGPT, AvailFP, [J, I | Acc]) + end. + +set_args1(Reg, Reg) -> + []; +set_args1({x_reg, extra}, Reg) -> + jit_aarch64_asm:ldr(Reg, ?X_REG(?MAX_REG)); +set_args1({x_reg, X}, Reg) -> + jit_aarch64_asm:ldr(Reg, ?X_REG(X)); +set_args1({ptr, Source}, Reg) -> + jit_aarch64_asm:ldr(Reg, {0, Source}); +set_args1({y_reg, X}, Reg) -> + [ + jit_aarch64_asm:mov(Reg, ?Y_REGS), + jit_aarch64_asm:mov(Reg, {X * 8, Reg}) + ]; +set_args1(ArgReg, Reg) when ?IS_GPR(ArgReg) -> + jit_aarch64_asm:mov(Reg, ArgReg); +set_args1(Arg, Reg) when is_integer(Arg) andalso Arg >= -16#80000000 andalso Arg < 16#80000000 -> + jit_aarch64_asm:mov(Reg, Arg); +set_args1(Arg, Reg) when is_integer(Arg) -> + %% For large immediates, we need a more complex sequence in AArch64 + %% For now, just use the immediate (may need expansion later) + jit_aarch64_asm:mov(Reg, Arg). + +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0} = State, 0, {x_reg, X} +) when + X < ?MAX_REG +-> + I1 = jit_aarch64_asm:str(xzr, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0} = State, 0, {x_reg, extra} +) -> + I1 = jit_aarch64_asm:str(xzr, ?X_REG(?MAX_REG)), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_vm_register(#state{stream_module = StreamModule, stream = Stream0} = State, 0, {ptr, Reg}) -> + I1 = jit_aarch64_asm:str(xzr, {0, Reg}), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + N, + {x_reg, X} +) when + X < ?MAX_REG andalso ?IS_SINT32_T(N) +-> + I1 = jit_aarch64_asm:mov(Temp, N), + I2 = jit_aarch64_asm:str(Temp, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + N, + {ptr, Reg} +) when + ?IS_SINT32_T(N) +-> + I1 = jit_aarch64_asm:mov(Temp, N), + I2 = jit_aarch64_asm:str(Temp, {0, Reg}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + N, + {x_reg, X} +) when + X < ?MAX_REG andalso is_integer(N) +-> + I1 = jit_aarch64_asm:mov(Temp, N), + I2 = jit_aarch64_asm:str(Temp, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + N, + {ptr, Reg} +) when + is_integer(N) +-> + I1 = jit_aarch64_asm:mov(Temp, N), + I2 = jit_aarch64_asm:str(Temp, {0, Reg}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + 0, + {y_reg, Y} +) -> + I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), + I2 = jit_aarch64_asm:str(xzr, {Y * 8, Temp}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | _]} = + State, + N, + {y_reg, Y} +) when ?IS_SINT32_T(N) -> + I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), + I2 = jit_aarch64_asm:mov(Temp2, N), + I3 = jit_aarch64_asm:str(Temp2, {Y * 8, Temp1}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | _]} = + State, + N, + {y_reg, X} +) when + X < 32 andalso is_integer(N) +-> + I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), + I2 = jit_aarch64_asm:mov(Temp2, N), + I3 = jit_aarch64_asm:str(Temp2, {X * 8, Temp1}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + {x_reg, X}, + {x_reg, Y} +) when X < ?MAX_REG andalso Y < ?MAX_REG -> + I1 = jit_aarch64_asm:ldr(Temp, ?X_REG(X)), + I2 = jit_aarch64_asm:str(Temp, ?X_REG(Y)), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + {x_reg, X}, + {ptr, Reg} +) when X < ?MAX_REG -> + I1 = jit_aarch64_asm:ldr(Temp, ?X_REG(X)), + I2 = jit_aarch64_asm:str(Temp, {0, Reg}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | _]} = + State, + {x_reg, X}, + {y_reg, Y} +) when X < ?MAX_REG -> + I1 = jit_aarch64_asm:ldr(Temp1, ?X_REG(X)), + I2 = jit_aarch64_asm:ldr(Temp2, ?Y_REGS), + I3 = jit_aarch64_asm:str(Temp1, {Y * 8, Temp2}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + {y_reg, Y}, + {x_reg, X} +) when X < ?MAX_REG -> + I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(Temp, {Y * 8, Temp}), + I3 = jit_aarch64_asm:str(Temp, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + {y_reg, Y}, + {ptr, Reg} +) -> + I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(Temp, {Y * 8, Temp}), + I3 = jit_aarch64_asm:str(Temp, {0, Reg}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | _]} = + State, + {y_reg, YS}, + {y_reg, YD} +) -> + I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(Temp2, {YS * 8, Temp1}), + I3 = jit_aarch64_asm:str(Temp2, {YD * 8, Temp1}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, {x_reg, X} +) when is_atom(Reg) andalso X < ?MAX_REG -> + I1 = jit_aarch64_asm:str(Reg, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, {ptr, Dest} +) when is_atom(Reg) -> + I1 = jit_aarch64_asm:str(Reg, {0, Dest}), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, available_regs = [Temp | _], stream = Stream0} = State, + Reg, + {y_reg, Y} +) when is_atom(Reg) -> + I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), + I2 = jit_aarch64_asm:str(Reg, {Y * 8, Temp}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, available_regs = [Temp1, Temp2 | _], stream = Stream0} = + State, + {ptr, Reg}, + {y_reg, Y} +) when ?IS_GPR(Reg) -> + I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(Temp2, {0, Reg}), + I3 = jit_aarch64_asm:str(Temp2, {Y * 8, Temp1}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_to_vm_register( + #state{stream_module = StreamModule, available_regs = [Temp | _], stream = Stream0} = State, + Reg, + {fp_reg, F} +) when is_atom(Reg) -> + I1 = jit_aarch64_asm:ldr(Temp, ?FP_REGS), + I2 = jit_aarch64_asm:str(Reg, {F * 8, Temp}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +%% @doc move reg[x] to a vm or native register +-spec move_array_element( + state(), + aarch64_register(), + non_neg_integer() | aarch64_register(), + vm_register() | aarch64_register() +) -> state(). +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {x_reg, X} +) when X < ?MAX_REG andalso is_integer(Index) -> + I1 = jit_x86_64_asm:movq({Index * 8, Reg}, Temp), + I2 = jit_x86_64_asm:movq(Temp, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {ptr, Dest} +) when is_integer(Index) -> + I1 = jit_x86_64_asm:movq({Index * 8, Reg}, Temp), + I2 = jit_x86_64_asm:movq(Temp, {0, Dest}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | _]} = + State, + Reg, + Index, + {y_reg, Y} +) when is_integer(Index) -> + I1 = jit_x86_64_asm:movq(?Y_REGS, Temp1), + I2 = jit_x86_64_asm:movq({Index * 8, Reg}, Temp2), + I3 = jit_x86_64_asm:movq(Temp2, {Y * 8, Temp1}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = + State, + {free, Reg}, + Index, + {y_reg, Y} +) when is_integer(Index) -> + I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), + I2 = jit_x86_64_asm:movq({Index * 8, Reg}, Reg), + I3 = jit_x86_64_asm:movq(Reg, {Y * 8, Temp}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest +) when is_atom(Dest) andalso is_integer(Index) -> + I1 = jit_x86_64_asm:movq({Index * 8, Reg}, Dest), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0, + available_fpregs = AvailableFPRegs0 + } = State, + Reg, + {free, IndexReg}, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(IndexReg) -> + I1 = jit_x86_64_asm:shlq(3, IndexReg), + I2 = jit_x86_64_asm:addq(Reg, IndexReg), + I3 = jit_x86_64_asm:movq({0, IndexReg}, IndexReg), + I4 = jit_x86_64_asm:movq(IndexReg, ?X_REG(X)), + {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( + AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + available_fpregs = AvailableFPRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] = AvailableRegs0, + used_regs = UsedRegs0, + available_fpregs = AvailableFPRegs0 + } = State, + Reg, + {free, IndexReg}, + {y_reg, Y} +) when ?IS_GPR(IndexReg) -> + I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), + I2 = jit_x86_64_asm:shlq(3, IndexReg), + I3 = jit_x86_64_asm:addq(Reg, IndexReg), + I4 = jit_x86_64_asm:movq({0, IndexReg}, IndexReg), + I5 = jit_x86_64_asm:movq(IndexReg, {Y * 8, Temp}), + {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( + AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append( + Stream0, <> + ), + State#state{ + available_regs = AvailableRegs1, + available_fpregs = AvailableFPRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }. + +%% @doc move reg[x] to a vm or native register +-spec get_array_element(state(), aarch64_register(), non_neg_integer()) -> + {state(), aarch64_register()}. +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ElemReg | AvailableT], + used_regs = UsedRegs0 + } = State, + Reg, + Index +) -> + I1 = jit_x86_64_asm:movq({Index * 8, Reg}, ElemReg), + Stream1 = StreamModule:append(Stream0, <>), + { + State#state{ + stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0] + }, + ElemReg + }. + +%% @doc move an integer, a vm or native register to reg[x] +-spec move_to_array_element( + state(), integer() | vm_register() | aarch64_register(), aarch64_register(), non_neg_integer() +) -> state(). +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + {x_reg, X}, + Reg, + Index +) when X < ?MAX_REG andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + I1 = jit_x86_64_asm:movq(?X_REG(X), Temp), + I2 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + {x_reg, X}, + Reg, + IndexReg +) when X < ?MAX_REG andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + I1 = jit_x86_64_asm:movq(?X_REG(X), Temp), + I2 = jit_x86_64_asm:movq(Temp, {0, Reg, IndexReg, 8}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + {ptr, Source}, + Reg, + Index +) -> + I1 = jit_x86_64_asm:movq({0, Source}, Temp), + I2 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = + State, + {y_reg, Y}, + Reg, + Index +) when ?IS_GPR(Reg) andalso is_integer(Index) -> + I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), + I2 = jit_x86_64_asm:movq({Y * 8, Temp}, Temp), + I3 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = + State, + {y_reg, Y}, + Reg, + IndexReg +) when ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), + I2 = jit_x86_64_asm:movq({Y * 8, Temp}, Temp), + I3 = jit_x86_64_asm:movq(Temp, {0, Reg, IndexReg, 8}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State, Source, Reg, Index +) when ?IS_GPR(Source) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + I1 = jit_x86_64_asm:movq(Source, {Index * 8, Reg}), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State, Source, Reg, Index +) when ?IS_SINT32_T(Source) andalso is_integer(Index) -> + I1 = jit_x86_64_asm:movq(Source, {Index * 8, Reg}), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Source, + Reg, + Index +) when is_integer(Source) andalso is_integer(Index) -> + I1 = jit_x86_64_asm:movabsq(Source, Temp), + I2 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}. + +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + {x_reg, X}, + BaseReg, + IndexReg, + Offset +) when X < ?MAX_REG andalso ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + I1 = jit_x86_64_asm:movq(?X_REG(X), Temp), + I2 = jit_x86_64_asm:movq(Temp, {Offset, BaseReg, IndexReg, 8}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + {y_reg, Y}, + BaseReg, + IndexReg, + Offset +) when ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), + I2 = jit_x86_64_asm:movq({Y * 8, Temp}, Temp), + I3 = jit_x86_64_asm:movq(Temp, {Offset, BaseReg, IndexReg, 8}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State, + Source, + BaseReg, + IndexReg, + Offset +) when + ?IS_GPR(Source) andalso ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) +-> + I1 = jit_x86_64_asm:movq(Source, {Offset, BaseReg, IndexReg, 8}), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State, + Source, + BaseReg, + IndexReg, + Offset +) when + ?IS_SINT32_T(Source) andalso ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso + is_integer(Offset) +-> + I1 = jit_x86_64_asm:movq(Source, {Offset, BaseReg, IndexReg, 8}), + Stream1 = StreamModule:append(Stream0, I1 / binary), + State#state{stream = Stream1}; +move_to_array_element( + State, + Source, + BaseReg, + IndexReg, + Offset +) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 -> + move_to_array_element(State, Source, BaseReg, IndexReg + (Offset div 8)). + +-spec move_to_native_register(state(), value()) -> {state(), aarch64_register()}. +move_to_native_register(State, Reg) when is_atom(Reg) -> + {State, Reg}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_x86_64_asm:movq({0, Reg}, Reg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + Imm +) when + is_integer(Imm) +-> + I1 = jit_x86_64_asm:movq(Imm, Reg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, X} +) when + X < ?MAX_REG +-> + I1 = jit_x86_64_asm:movq(?X_REG(X), Reg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {y_reg, Y} +) -> + I1 = jit_x86_64_asm:movq(?Y_REGS, Reg), + I2 = jit_x86_64_asm:movq({Y * 8, Reg}, Reg), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + available_fpregs = [FPReg | AvailFT], + used_regs = Used + } = State, + {fp_reg, F} +) -> + I1 = jit_x86_64_asm:movq(?FP_REGS, Temp), + I2 = jit_x86_64_asm:movsd({F * 8, Temp}, FPReg), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, available_fpregs = AvailFT, used_regs = [FPReg | Used]}, FPReg}. + +-spec move_to_native_register(state(), value(), aarch64_register()) -> state(). +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst +) when is_atom(RegSrc) orelse is_integer(RegSrc) -> + I = jit_x86_64_asm:movq(RegSrc, RegDst), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst +) when is_atom(Reg) -> + I1 = jit_x86_64_asm:movq({0, Reg}, RegDst), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst +) when + X < ?MAX_REG +-> + I1 = jit_x86_64_asm:movq(?X_REG(X), RegDst), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {y_reg, Y}, RegDst +) -> + I1 = jit_x86_64_asm:movq(?Y_REGS, RegDst), + I2 = jit_x86_64_asm:movq({Y * 8, RegDst}, RegDst), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State, + {fp_reg, F}, + RegDst +) -> + I1 = jit_x86_64_asm:movq(?FP_REGS, Temp), + I2 = jit_x86_64_asm:movsd({F * 8, Temp}, RegDst), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +-spec copy_to_native_register(state(), value()) -> {state(), aarch64_register()}. +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + Reg +) when is_atom(Reg) -> + I1 = jit_x86_64_asm:movq(Reg, SaveReg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_x86_64_asm:movq({0, Reg}, SaveReg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register(State, Reg) -> + move_to_native_register(State, Reg). + +move_to_cp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + {y_reg, Y} +) -> + I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(Reg, {Y * 8, Reg}), + I3 = jit_aarch64_asm:str(Reg, ?CP), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +increment_sp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + Offset +) -> + I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS), + I2 = jit_aarch64_asm:add(Reg, Reg, Offset * 8), + I3 = jit_aarch64_asm:str(Reg, ?Y_REGS), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +set_continuation_to_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches + } = State, + Label +) -> + Offset = StreamModule:offset(Stream0), + {RewriteLEAOffset, I1} = jit_x86_64_asm:leaq_rel32({-4, rip}, Temp), + Reloc = {Label, Offset + RewriteLEAOffset, 32}, + I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]}. + +set_continuation_to_offset( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches + } = State +) -> + OffsetRef = make_ref(), + Offset = StreamModule:offset(Stream0), + {RewriteLEAOffset, I1} = jit_x86_64_asm:leaq_rel32({-4, rip}, Temp), + Reloc = {OffsetRef, Offset + RewriteLEAOffset, 32}, + I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}. + +get_module_index( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailableT], + used_regs = UsedRegs0 + } = State +) -> + I1 = jit_x86_64_asm:movq(?JITSTATE_MODULE, Reg), + I2 = jit_x86_64_asm:movl(?MODULE_INDEX(Reg), Reg), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{stream = Stream1, available_regs = AvailableT, used_regs = [Reg | UsedRegs0]}, + Reg + }. + +and_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> + I1 = jit_x86_64_asm:andq(Val, Reg), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + +or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> + I1 = jit_x86_64_asm:orq(Val, Reg), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + +add(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> + I1 = jit_x86_64_asm:addq(Val, Reg), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> + I1 = jit_x86_64_asm:subq(Val, Reg), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + +mul(State, _Reg, 1) -> + State; +mul(State, Reg, 2) -> + shift_left(State, Reg, 1); +mul(State, Reg, 4) -> + shift_left(State, Reg, 2); +mul(State, Reg, 8) -> + shift_left(State, Reg, 3); +mul(State, Reg, 16) -> + shift_left(State, Reg, 4); +mul(State, Reg, 32) -> + shift_left(State, Reg, 5); +mul(State, Reg, 64) -> + shift_left(State, Reg, 6); +mul(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> + I1 = jit_x86_64_asm:imulq(Val, Reg), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + +-spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). +decrement_reductions_and_maybe_schedule_next( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0 +) -> + % Load reduction count + I1 = jit_aarch64_asm:ldr(Temp, ?JITSTATE_REDUCTIONCOUNT), + % Decrement reduction count + I2 = jit_aarch64_asm:subs(Temp, Temp, 1), + % Store back the decremented value + I3 = jit_aarch64_asm:str(Temp, ?JITSTATE_REDUCTIONCOUNT), + Stream1 = StreamModule:append(Stream0, <>), + BNEOffset = StreamModule:offset(Stream1), + % Branch if reduction count is not zero + I4 = jit_aarch64_asm:bcc(ne, 0), + % Set continuation to the next instruction + ADROffset = BNEOffset + byte_size(I4), + I5 = jit_aarch64_asm:adr(Temp, 0), + I6 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION), + % Append the instructions to the stream + Stream2 = StreamModule:append(Stream1, <>), + State1 = State0#state{stream = Stream2}, + State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]), + % Rewrite the branch and adr instructions + #state{stream = Stream3} = State2, + NewOffset = StreamModule:offset(Stream3), + NewI4 = jit_aarch64_asm:bcc(ne, NewOffset - BNEOffset), + NewI5 = jit_aarch64_asm:adr(Temp, NewOffset - ADROffset), + Stream4 = StreamModule:replace( + Stream3, BNEOffset, <> + ), + merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs). + +-spec call_or_schedule_next(state(), non_neg_integer()) -> state(). +call_or_schedule_next(State0, Label) -> + {State1, RewriteOffset} = set_cp(State0), + State2 = call_only_or_schedule_next(State1, Label), + rewrite_cp_offset(State2, RewriteOffset). + +call_only_or_schedule_next( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = Branches + } = State0, + Label +) -> + Offset = StreamModule:offset(Stream0), + I1 = jit_x86_64_asm:decl(?JITSTATE_REDUCTIONCOUNT), + {RewriteJMPOffset, I3} = jit_x86_64_asm:jmp_rel32(-4), + I2 = jit_x86_64_asm:jz(byte_size(I3)), + Sz = byte_size(I1) + byte_size(I2), + Reloc1 = {Label, Offset + Sz + RewriteJMPOffset, 32}, + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = State0#state{stream = Stream1, branches = [Reloc1 | Branches]}, + State2 = set_continuation_to_label(State1, Label), + call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). + +call_primitive_with_cp(State0, Primitive, Args) -> + {State1, RewriteOffset} = set_cp(State0), + State2 = call_primitive_last(State1, Primitive, Args), + rewrite_cp_offset(State2, RewriteOffset). + +-spec set_cp(state()) -> {state(), non_neg_integer()}. +set_cp(State0) -> + % get module index (dynamically) + {#state{stream_module = StreamModule, stream = Stream0} = State1, Reg} = get_module_index( + State0 + ), + Offset = StreamModule:offset(Stream0), + % build cp with module_index << 24 + I1 = jit_x86_64_asm:shlq(24, Reg), + % next part of cp is instruction offset, after the call. + {RewriteOffset, I2} = jit_x86_64_asm:orq_rel32(0, Reg), + AddrOffset = Offset + byte_size(I1) + RewriteOffset, + I3 = jit_x86_64_asm:movq(Reg, ?CP), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State2 = State1#state{stream = Stream1}, + State3 = free_native_register(State2, Reg), + {State3, AddrOffset}. + +-spec rewrite_cp_offset(state(), non_neg_integer()) -> state(). +rewrite_cp_offset( + #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0, + RewriteOffset +) -> + NewOffset = StreamModule:offset(Stream0) - CodeOffset, + % Encode ReturnAddrOffset << 2 + Stream1 = StreamModule:replace(Stream0, RewriteOffset, <<(NewOffset bsl 2):32/little>>), + State0#state{stream = Stream1}. + +set_bs(#state{stream_module = StreamModule, stream = Stream0} = State0, TermReg) -> + I1 = jit_x86_64_asm:movq(TermReg, ?BS), + I2 = jit_x86_64_asm:movq(0, ?BS_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param SortedLabels labels information, sorted by offset +%% @param SortedLines line information, sorted by offset +%% @doc Build labels and line tables and encode a function that returns it. +%% In this case, the function returns the effective address of what immediately +%% follows. +%% @end +%% @return New state +%%----------------------------------------------------------------------------- +return_labels_and_lines( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + SortedLabels, + SortedLines +) -> + I2 = jit_x86_64_asm:retq(), + {_RewriteLEAOffset, I1} = jit_x86_64_asm:leaq_rel32({byte_size(I2), rip}, rax), + LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, + LinesTable = <<<> || {Line, Offset} <- SortedLines>>, + Stream1 = StreamModule:append( + Stream0, + <> + ), + State#state{stream = Stream1}. + +free_reg(AvailableRegs0, AvailableFPRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> + AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), + true = lists:member(Reg, UsedRegs0), + UsedRegs1 = lists:delete(Reg, UsedRegs0), + {AvailableRegs1, AvailableFPRegs0, UsedRegs1}; +free_reg(AvailableRegs0, AvailableFPRegs0, UsedRegs0, Reg) when ?IS_FPR(Reg) -> + AvailableFPRegs1 = free_reg0(?AVAILABLE_FPREGS, AvailableFPRegs0, Reg, []), + true = lists:member(Reg, UsedRegs0), + UsedRegs1 = lists:delete(Reg, UsedRegs0), + {AvailableRegs0, AvailableFPRegs1, UsedRegs1}. + +free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) -> + lists:reverse(Acc, [Reg | PrevRegs0]); +free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) -> + free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]); +free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) -> + free_reg0(SortedT, PrevRegs, Reg, Acc). + +args_regs(Args) -> + lists:map( + fun + ({free, {ptr, Reg}}) -> Reg; + ({free, Reg}) when is_atom(Reg) -> Reg; + ({free, Imm}) when is_integer(Imm) -> imm; + (offset) -> imm; + (ctx) -> ?CTX_REG; + (jit_state) -> ?JITSTATE_REG; + (Reg) when is_atom(Reg) -> Reg; + (Imm) when is_integer(Imm) -> imm; + ({ptr, Reg}) -> Reg; + ({x_reg, _}) -> ?CTX_REG; + ({y_reg, _}) -> ?CTX_REG; + ({fp_reg, _}) -> ?CTX_REG; + ({free, {x_reg, _}}) -> ?CTX_REG; + ({free, {y_reg, _}}) -> ?CTX_REG; + ({free, {fp_reg, _}}) -> ?CTX_REG + end, + Args + ). diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl new file mode 100644 index 000000000..dee323452 --- /dev/null +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -0,0 +1,822 @@ +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + +-module(jit_aarch64_asm). + +-export([ + add/3, + b/1, + bcc/2, + blr/1, + br/1, + brk/1, + cmp/2, + cmp32/2, + and_reg/3, + ldr/2, + ldr_x/3, + lsl/3, + lsr/3, + mov/2, + movk/3, + movz/3, + orr/3, + patch_b_offset/2, + patch_bcc_offset/2, + ret/0, + str/2, + str_x/3, + tst/2, + tst32/2, + stp_x/4, + ldp_x/4, + subs/3, + adr/2 +]). + +-type aarch64_gpr_register() :: + r0 + | r1 + | r2 + | r3 + | r4 + | r5 + | r6 + | r7 + | r8 + | r9 + | r10 + | r11 + | r12 + | r13 + | r14 + | r15 + | xzr. + +%% Emit an ADD instruction (AArch64 encoding) +%% ADD Rd, Rn, #imm - adds immediate value to register +-spec add(aarch64_gpr_register(), aarch64_gpr_register(), integer()) -> binary(). +add(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + %% AArch64 ADD (immediate) encoding: 1001000100iiiiiiiiiiiinnnnndddddd + %% 0x91000000 | Imm << 10 | Rn << 5 | Rd + <<(16#91000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>. + +%% Emit an unconditional branch (B) to a 32-bit relative offset (AArch64 encoding) +%% offset is in bytes, relative to the next instruction +-spec b(integer()) -> binary(). +b(Offset) when is_integer(Offset) -> + %% AArch64 B encoding: 0b000101 | imm26 | 00000 + %% imm26 is (Offset / 4) signed, fits in 26 bits + <<(16#14000000 bor (Offset div 4)):32/little>>. + +%% Emit a breakpoint (BRK) instruction with immediate (AArch64 encoding) +%% imm is a 16-bit immediate value (usually 0 for debuggers) +-spec brk(integer()) -> binary(). +brk(Imm) when is_integer(Imm), Imm >= 0, Imm =< 16#FFFF -> + %% AArch64 BRK encoding: 11010100 00100000 00000000 iiiiiiii iiiiiiii + %% 0xd4200000 | (Imm << 5) + <<(16#D4200000 bor ((Imm band 16#FFFF) bsl 5)):32/little>>. + +%% Emit a branch with link register (BLR) instruction (AArch64 encoding) +%% Register is the register atom (r0-r15) +-spec blr(aarch64_gpr_register()) -> binary(). +blr(Reg) when is_atom(Reg) -> + RegNum = reg_to_num(Reg), + %% AArch64 BLR encoding: 1101011000111111000000rrrrr00000 + %% 0xd63f0000 | (Reg << 5) + <<(16#D63F0000 bor (RegNum bsl 5)):32/little>>. + +%% Emit a branch register (BR) instruction (AArch64 encoding) +%% Register is the register atom (r0-r15) +-spec br(aarch64_gpr_register()) -> binary(). +br(Reg) when is_atom(Reg) -> + RegNum = reg_to_num(Reg), + %% AArch64 BR encoding: 1101011000011111000000rrrrr00000 + %% 0xd61f0000 | (Reg << 5) + <<(16#D61F0000 bor (RegNum bsl 5)):32/little>>. + +%% Emit a load register (LDR) instruction for 64-bit load from memory (AArch64 encoding) +%% Dst is destination register atom, Src is {Offset, BaseReg} tuple +-spec ldr(aarch64_gpr_register(), {integer(), aarch64_gpr_register()}) -> binary(). +ldr(Dst, {Offset, BaseReg}) when + is_atom(Dst), + is_atom(BaseReg), + is_integer(Offset), + Offset >= 0, + Offset =< 32760, + (Offset rem 8) =:= 0 +-> + DstNum = reg_to_num(Dst), + BaseRegNum = reg_to_num(BaseReg), + %% AArch64 LDR (immediate) encoding for 64-bit: 11111001010iiiiiiiiiiibbbbbttttt + %% 0xf9400000 | (Offset div 8) << 10 | BaseReg << 5 | Dst + << + (16#F9400000 bor ((Offset div 8) bsl 10) bor (BaseRegNum bsl 5) bor DstNum):32/little + >>. + +%% Emit a move immediate (MOV) instruction for various immediate sizes (AArch64 encoding) +%% Dst is destination register atom, Imm is immediate value +%% Returns a binary that may contain multiple instructions for complex immediates +-spec mov(aarch64_gpr_register(), integer() | aarch64_gpr_register()) -> binary(). +mov(Dst, Imm) when is_atom(Dst), is_integer(Imm) -> + mov_immediate(Dst, Imm); +mov(Rd, Rm) when is_atom(Rd), is_atom(Rm) -> + orr(Rd, xzr, Rm). + +%% Helper function to encode immediate values using optimal instruction sequence +-spec mov_immediate(aarch64_gpr_register(), integer()) -> binary(). +mov_immediate(Dst, Imm) when Imm >= 0, Imm =< 16#FFFF -> + %% Simple 16-bit positive immediate + movz(Dst, Imm, 0); +mov_immediate(Dst, Imm) when Imm < 0, Imm >= -16#FFFF -> + %% Simple 16-bit negative immediate using MOVN + DstNum = reg_to_num(Dst), + <<(16#92800000 bor (((-Imm - 1) band 16#FFFF) bsl 5) bor DstNum):32/little>>; +mov_immediate(Dst, Imm) when Imm >= 0 -> + %% Complex positive immediate - build with MOVZ + MOVK sequence + build_positive_immediate(Dst, Imm); +mov_immediate(Dst, Imm) when Imm < 0 -> + %% Complex negative immediate - try MOVN approach first + build_negative_immediate(Dst, Imm). + +%% Build positive immediate using MOVZ + MOVK sequence +-spec build_positive_immediate(aarch64_gpr_register(), integer()) -> binary(). +build_positive_immediate(Dst, Imm) -> + %% First try simple MOVZ/MOVK sequence for values with few non-zero chunks + Chunks = [ + Imm band 16#FFFF, + (Imm bsr 16) band 16#FFFF, + (Imm bsr 32) band 16#FFFF, + (Imm bsr 48) band 16#FFFF + ], + NonZeroChunks = length([C || C <- Chunks, C =/= 0]), + + if + NonZeroChunks =< 2 -> + %% Use simple MOVZ/MOVK sequence for 1-2 chunks + build_immediate_sequence(Dst, Chunks); + true -> + %% For complex values, try bitmask immediate first + case encode_bitmask_immediate(Imm) of + {ok, N, Immr, Imms} -> + %% Use ORR immediate (MOV Rd, #imm is ORR Rd, XZR, #imm) + orr_immediate(Dst, N, Immr, Imms); + error -> + %% Fallback to multi-instruction sequence + build_immediate_sequence(Dst, Chunks) + end + end. + +%% Build negative immediate using MOVN or fallback to positive approach +-spec build_negative_immediate(aarch64_gpr_register(), integer()) -> binary(). +build_negative_immediate(Dst, Imm) -> + %% First try to encode as bitmask immediate with ORR + case encode_bitmask_immediate(Imm) of + {ok, N, Immr, Imms} -> + %% Use ORR immediate (MOV Rd, #imm is ORR Rd, XZR, #imm) + orr_immediate(Dst, N, Immr, Imms); + error -> + %% Fallback to multi-instruction sequence + build_positive_immediate(Dst, Imm band 16#FFFFFFFFFFFFFFFF) + end. + +%% Build instruction sequence from chunks +-spec build_immediate_sequence(aarch64_gpr_register(), [integer()]) -> binary(). +build_immediate_sequence(Dst, [C0, C1, C2, C3]) -> + %% Find the first non-zero chunk to start with MOVZ + case find_first_nonzero_chunk([C0, C1, C2, C3]) of + {Index, Value} -> + First = movz(Dst, Value, Index * 16), + Rest = build_movk_sequence(Dst, [C0, C1, C2, C3], Index), + <>; + none -> + %% All chunks are zero + movz(Dst, 0, 0) + end. + +%% Find the first non-zero chunk +-spec find_first_nonzero_chunk([integer()]) -> {integer(), integer()} | none. +find_first_nonzero_chunk(Chunks) -> + find_first_nonzero_chunk(Chunks, 0). + +find_first_nonzero_chunk([], _) -> none; +find_first_nonzero_chunk([0 | Rest], Index) -> find_first_nonzero_chunk(Rest, Index + 1); +find_first_nonzero_chunk([Chunk | _], Index) -> {Index, Chunk}. + +%% Build MOVK sequence for remaining non-zero chunks +-spec build_movk_sequence(aarch64_gpr_register(), [integer()], integer()) -> binary(). +build_movk_sequence(Dst, Chunks, SkipIndex) -> + build_movk_sequence(Dst, Chunks, SkipIndex, 0, <<>>). + +build_movk_sequence(_, [], _, _, Acc) -> + Acc; +build_movk_sequence(Dst, [Chunk | Rest], SkipIndex, CurrentIndex, Acc) -> + NewAcc = + if + CurrentIndex =:= SkipIndex orelse Chunk =:= 0 -> + Acc; + true -> + MovkInstr = movk(Dst, Chunk, CurrentIndex * 16), + <> + end, + build_movk_sequence(Dst, Rest, SkipIndex, CurrentIndex + 1, NewAcc). + +%% Emit a MOVZ instruction (move with zero) +-spec movz(aarch64_gpr_register(), integer(), integer()) -> binary(). +movz(Dst, Imm, Shift) when + is_atom(Dst), + is_integer(Imm), + Imm >= 0, + Imm =< 16#FFFF, + Shift rem 16 =:= 0, + Shift >= 0, + Shift =< 48 +-> + DstNum = reg_to_num(Dst), + Hw = Shift div 16, + %% AArch64 MOVZ encoding: 1101001000hwiiiiiiiiiiiiiiiiibbbbb + <<(16#D2800000 bor (Hw bsl 21) bor ((Imm band 16#FFFF) bsl 5) bor DstNum):32/little>>. + +%% Emit a MOVK instruction (move with keep) +-spec movk(aarch64_gpr_register(), integer(), integer()) -> binary(). +movk(Dst, Imm, Shift) when + is_atom(Dst), + is_integer(Imm), + Imm >= 0, + Imm =< 16#FFFF, + Shift rem 16 =:= 0, + Shift >= 0, + Shift =< 48 +-> + DstNum = reg_to_num(Dst), + Hw = Shift div 16, + %% AArch64 MOVK encoding: 1111001000hwiiiiiiiiiiiiiiiiibbbbb + <<(16#F2800000 bor (Hw bsl 21) bor ((Imm band 16#FFFF) bsl 5) bor DstNum):32/little>>. + +%% Emit an ORR immediate instruction (used for MOV with bitmask immediates) +-spec orr_immediate(aarch64_gpr_register(), integer(), integer(), integer()) -> binary(). +orr_immediate(Dst, N, Immr, Imms) when + is_atom(Dst), + N >= 0, + N =< 1, + Immr >= 0, + Immr =< 63, + Imms >= 0, + Imms =< 63 +-> + DstNum = reg_to_num(Dst), + %% AArch64 ORR (immediate) encoding: sf 01 100100 N immr imms Rn Rd + %% For MOV Rd, #imm: ORR Rd, XZR, #imm (Rn = 31) + + % 64-bit operation + Sf = 1, + << + ((Sf bsl 31) bor (16#32000000) bor (N bsl 22) bor (Immr bsl 16) bor (Imms bsl 10) bor + (31 bsl 5) bor DstNum):32/little + >>. + +%% Encode a value as AArch64 bitmask immediate +%% Returns {ok, N, Immr, Imms} if encodable, error otherwise +-spec encode_bitmask_immediate(integer()) -> {ok, integer(), integer(), integer()} | error. +encode_bitmask_immediate(Value) when is_integer(Value) -> + %% Convert to 64-bit unsigned + UnsignedValue = Value band 16#FFFFFFFFFFFFFFFF, + + %% Try different pattern sizes (64, 32, 16, 8, 4, 2) + PatternSizes = [64, 32, 16, 8, 4, 2], + try_pattern_sizes(UnsignedValue, PatternSizes). + +%% Try encoding with different pattern sizes +-spec try_pattern_sizes(integer(), [integer()]) -> {ok, integer(), integer(), integer()} | error. +try_pattern_sizes(_, []) -> + error; +try_pattern_sizes(Value, [Size | Rest]) -> + case try_encode_pattern_size(Value, Size) of + {ok, N, Immr, Imms} -> {ok, N, Immr, Imms}; + error -> try_pattern_sizes(Value, Rest) + end. + +%% Try to encode value with a specific pattern size +-spec try_encode_pattern_size(integer(), integer()) -> + {ok, integer(), integer(), integer()} | error. +try_encode_pattern_size(Value, Size) -> + %% Extract the pattern of the given size + Mask = (1 bsl Size) - 1, + Pattern = Value band Mask, + + %% Check if the value is just this pattern repeated + case is_repeating_pattern(Value, Pattern, Size) of + true -> try_encode_single_pattern(Pattern, Size); + false -> error + end. + +%% Check if value consists of pattern repeated +-spec is_repeating_pattern(integer(), integer(), integer()) -> boolean(). +is_repeating_pattern(Value, Pattern, Size) -> + is_repeating_pattern(Value, Pattern, Size, 0). + +is_repeating_pattern(0, 0, _, _) -> + true; +is_repeating_pattern(Value, Pattern, Size, Pos) when Pos < 64 -> + Mask = (1 bsl Size) - 1, + CurrentPattern = (Value bsr Pos) band Mask, + case CurrentPattern of + Pattern when (Value bsr (Pos + Size)) =:= 0 -> true; + Pattern -> is_repeating_pattern(Value, Pattern, Size, Pos + Size); + _ -> false + end; +is_repeating_pattern(_, _, _, _) -> + false. + +%% Try to encode a single pattern as bitmask immediate +-spec try_encode_single_pattern(integer(), integer()) -> + {ok, integer(), integer(), integer()} | error. +try_encode_single_pattern(Pattern, Size) -> + %% Find runs of consecutive 1s and 0s + case find_single_run_of_ones(Pattern, Size) of + {ok, OnesCount, StartPos} -> + %% Calculate N, Immr, Imms + N = + case Size of + 64 -> 1; + 32 -> 0; + 16 -> 0; + 8 -> 0; + 4 -> 0; + 2 -> 0 + end, + + %% For N=0 patterns, we need to encode the size in imms + Imms = + case Size of + 64 -> OnesCount - 1; + 32 -> (1 bsl 5) bor (OnesCount - 1); + 16 -> (1 bsl 4) bor (OnesCount - 1); + 8 -> (1 bsl 3) bor (OnesCount - 1); + 4 -> (1 bsl 2) bor (OnesCount - 1); + 2 -> (1 bsl 1) bor (OnesCount - 1) + end, + + %% immr is the rotation amount (negate of start position) + Immr = (-StartPos) band (Size - 1), + + {ok, N, Immr, Imms}; + error -> + error + end. + +%% Find a single run of consecutive 1s in the pattern +-spec find_single_run_of_ones(integer(), integer()) -> {ok, integer(), integer()} | error. +find_single_run_of_ones(Pattern, Size) -> + %% Convert to binary string for easier analysis + Bits = [(Pattern bsr I) band 1 || I <- lists:seq(0, Size - 1)], + find_ones_run(Bits, 0, 0, 0, none). + +find_ones_run([], _, OnesCount, StartPos, in_ones) when OnesCount > 0 -> + %% Reached end while in ones run + {ok, OnesCount, StartPos}; +find_ones_run([], _, _, _, _) -> + error; +find_ones_run([1 | Rest], Pos, 0, _, none) -> + %% Start of ones run + find_ones_run(Rest, Pos + 1, 1, Pos, in_ones); +find_ones_run([1 | Rest], Pos, OnesCount, StartPos, in_ones) -> + %% Continue ones run + find_ones_run(Rest, Pos + 1, OnesCount + 1, StartPos, in_ones); +find_ones_run([0 | Rest], _Pos, OnesCount, StartPos, in_ones) -> + %% End of ones run - make sure rest are zeros (single run only) + case lists:all(fun(X) -> X =:= 0 end, Rest) of + true -> {ok, OnesCount, StartPos}; + %% Multiple runs not supported in simple encoding + false -> error + end; +find_ones_run([0 | Rest], Pos, OnesCount, StartPos, none) -> + %% Still looking for start of ones run + find_ones_run(Rest, Pos + 1, OnesCount, StartPos, none). + +%% Emit an ORR instruction (AArch64 encoding) +%% ORR Rd, Rn, Rm - performs bitwise OR of Rn and Rm, storing result in Rd +%% Special cases: ORR Rd, XZR, Rm is equivalent to MOV Rd, Rm +-spec orr(aarch64_gpr_register(), aarch64_gpr_register() | xzr, aarch64_gpr_register()) -> binary(). +orr(DstReg, xzr, SrcReg) when is_atom(DstReg), is_atom(SrcReg) -> + %% ORR Rd, XZR, Rm - equivalent to MOV Rd, Rm + SrcNum = reg_to_num(SrcReg), + DstNum = reg_to_num(DstReg), + %% AArch64 ORR (shifted register) encoding: Rd = Rm (with XZR as Rn) + %% 10101010000mmmmm000000nnnnndddddd (64-bit) + %% 0xaa000000 | Rm << 16 | Rn << 5 | Rd (where Rn = 31 for XZR) + <<(16#AA0003E0 bor (SrcNum bsl 16) bor DstNum):32/little>>; +orr(DstReg, Rn, Rm) when is_atom(DstReg), is_atom(Rn), is_atom(Rm) -> + %% General ORR Rd, Rn, Rm + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + DstNum = reg_to_num(DstReg), + %% AArch64 ORR (shifted register) encoding: + %% 10101010000mmmmm000000nnnnndddddd (64-bit) + << + (16#AA000000 bor (RmNum bsl 16) bor (RnNum bsl 5) bor DstNum):32/little + >>. + +%% Emit a store register (STR) instruction for 64-bit store to memory +-spec str(aarch64_gpr_register(), {integer(), aarch64_gpr_register()}) -> binary(). +str(SrcReg, {Offset, BaseReg}) when + is_atom(SrcReg), + is_atom(BaseReg), + is_integer(Offset), + Offset >= 0, + Offset =< 32760, + (Offset rem 8) =:= 0 +-> + SrcNum = reg_to_num(SrcReg), + BaseNum = reg_to_num(BaseReg), + %% AArch64 STR (immediate) encoding for 64-bit: 11111001000iiiiiiiiiiibbbbbttttt + %% 0xf9000000 | (Offset div 8) << 10 | BaseReg << 5 | SrcReg + << + (16#F9000000 bor ((Offset div 8) bsl 10) bor (BaseNum bsl 5) bor SrcNum):32/little + >>. + +%% Emit a store register (STR) instruction for 64-bit store to memory, with store-update (writeback) +-spec str_x + (aarch64_gpr_register(), {aarch64_gpr_register(), integer()}, '!') -> binary(); + (aarch64_gpr_register(), {aarch64_gpr_register()}, integer()) -> binary(). +str_x(Reg, {Base, Imm}, '!') when + is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0 +-> + RegNum = reg_to_num(Reg), + BaseNum = reg_to_num(Base), + <<(16#F8000C00 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>; +str_x(Reg, {Base}, Imm) when + is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0 +-> + RegNum = reg_to_num(Reg), + BaseNum = reg_to_num(Base), + <<(16#F8000400 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>. + +%% Emit a load register (LDR) instruction for 64-bit store to memory, with store-update (writeback) +-spec ldr_x + (aarch64_gpr_register(), {aarch64_gpr_register(), integer()}, '!') -> binary(); + (aarch64_gpr_register(), {aarch64_gpr_register()}, integer()) -> binary(). +ldr_x(Reg, {Base, Imm}, '!') when + is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0 +-> + RegNum = reg_to_num(Reg), + BaseNum = reg_to_num(Base), + <<(16#F8400C00 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>; +ldr_x(Reg, {Base}, Imm) when + is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0 +-> + RegNum = reg_to_num(Reg), + BaseNum = reg_to_num(Base), + <<(16#F8400400 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>. + +%% Emit a store pair (STP) instruction for 64-bit registers +%% stp_x(Rn, Rm, {Base}, Imm) -> binary() +%% stp_x(Rn, Rm, {Base, Imm}, '!') -> binary() (store-update) +-spec stp_x( + aarch64_gpr_register(), + aarch64_gpr_register(), + {aarch64_gpr_register()} | {aarch64_gpr_register(), integer()}, + integer() | '!' +) -> binary(). +stp_x(Rn, Rm, {Base}, Imm) when + is_atom(Rn), + is_atom(Rm), + is_atom(Base), + is_integer(Imm), + Imm >= -512, + Imm =< 504, + (Imm rem 8) =:= 0 +-> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + BaseNum = reg_to_num(Base), + %% STP encoding: 1010100010|imm7|base|rm|rn + %% 0xa9bf0000 | ((Imm div 8) band 0x7f) << 15 | Base << 5 | Rm << 10 | Rn + << + (16#A8800000 bor ((Imm div 8) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor RnNum):32/little + >>; +stp_x(Rn, Rm, {Base, Imm}, '!') when + is_atom(Rn), + is_atom(Rm), + is_atom(Base), + is_integer(Imm), + Imm >= -512, + Imm =< 504, + (Imm rem 8) =:= 0 +-> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + BaseNum = reg_to_num(Base), + << + (16#A9800000 bor (((Imm div 8) band 16#7F) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor + RnNum):32/little + >>. + +%% Emit a load pair (LDP) instruction for 64-bit registers +%% ldp_x(Rn, Rm, {Base}, Imm) -> binary() +-spec ldp_x(aarch64_gpr_register(), aarch64_gpr_register(), {aarch64_gpr_register()}, integer()) -> + binary(). +ldp_x(Rn, Rm, {Base}, Imm) when + is_atom(Rn), + is_atom(Rm), + is_atom(Base), + is_integer(Imm), + Imm >= -512, + Imm =< 504, + (Imm rem 8) =:= 0 +-> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + BaseNum = reg_to_num(Base), + %% LDP encoding: 1010100011|imm7|base|rm|rn + << + (16#A8C00000 bor (((Imm div 8) band 16#7F) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor + RnNum):32/little + >>. + +%%----------------------------------------------------------------------------- +%% Helper functions +%%----------------------------------------------------------------------------- + +%% Convert register atoms to register numbers for assembly generation +%% for r0 to r30 +reg_to_num(r0) -> 0; +reg_to_num(r1) -> 1; +reg_to_num(r2) -> 2; +reg_to_num(r3) -> 3; +reg_to_num(r4) -> 4; +reg_to_num(r5) -> 5; +reg_to_num(r6) -> 6; +reg_to_num(r7) -> 7; +reg_to_num(r8) -> 8; +reg_to_num(r9) -> 9; +reg_to_num(r10) -> 10; +reg_to_num(r11) -> 11; +reg_to_num(r12) -> 12; +reg_to_num(r13) -> 13; +reg_to_num(r14) -> 14; +reg_to_num(r15) -> 15; +reg_to_num(r16) -> 16; +reg_to_num(r17) -> 17; +reg_to_num(r18) -> 18; +reg_to_num(r19) -> 19; +reg_to_num(r20) -> 20; +reg_to_num(r21) -> 21; +reg_to_num(r22) -> 22; +reg_to_num(r23) -> 23; +reg_to_num(r24) -> 24; +reg_to_num(r25) -> 25; +reg_to_num(r26) -> 26; +reg_to_num(r27) -> 27; +reg_to_num(r28) -> 28; +reg_to_num(r29) -> 29; +reg_to_num(r30) -> 30; +%% Stack pointer (SP) is r31 +reg_to_num(sp) -> 31; +%% Zero register (XZR) is also r31 +reg_to_num(xzr) -> 31. + +%% Emit a conditional branch instruction +-spec bcc(atom(), integer()) -> binary(). +bcc(Cond, Offset) when is_atom(Cond), is_integer(Offset) -> + CondNum = + case Cond of + % Equal (Z set) + eq -> 0; + % Not equal (Z clear) + ne -> 1; + % Carry set + cs -> 2; + % Carry clear + cc -> 3; + % Minus (N set) + mi -> 4; + % Plus (N clear) + pl -> 5; + % Overflow set + vs -> 6; + % Overflow clear + vc -> 7; + % Higher (unsigned) + hi -> 8; + % Lower or same (unsigned) + ls -> 9; + % Greater than or equal (signed) + ge -> 10; + % Less than (signed) + lt -> 11; + % Greater than (signed) + gt -> 12; + % Less than or equal (signed) + le -> 13; + % Always + al -> 14; + % Never + nv -> 15 + end, + <<(16#54000000 bor ((Offset div 4) bsl 5) bor CondNum):32/little>>. + +%% Emit a compare instruction +-spec cmp(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). +cmp(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% AArch64 CMP (shifted register) encoding: CMP Rn, Rm + %% This is SUBS XZR, Rn, Rm: 11101011000mmmmm000000nnnnn11111 + <<(16#EB00001F bor (RmNum bsl 16) bor (RnNum bsl 5)):32/little>>; +cmp(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 -> + RnNum = reg_to_num(Rn), + %% AArch64 CMP (immediate) encoding: CMP Rn, #imm + %% This is SUBS XZR, Rn, #imm: 1111000100iiiiiiiiiiiinnnnn11111 + <<(16#F100001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little>>. + +%% Emit a 32-bit compare instruction +-spec cmp32(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). +cmp32(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% AArch64 CMP (32-bit shifted register) encoding: CMP Wn, Wm + %% This is SUBS WZR, Wn, Wm: 01101011000mmmmm000000nnnnn11111 + <<(16#6B00001F bor (RmNum bsl 16) bor (RnNum bsl 5)):32/little>>; +cmp32(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 -> + RnNum = reg_to_num(Rn), + %% AArch64 CMP (32-bit immediate) encoding: CMP Wn, #imm + %% This is SUBS WZR, Wn, #imm: 0111000100iiiiiiiiiiiinnnnn11111 + <<(16#7100001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little>>. + +%% Emit an AND instruction (bitwise AND) +-spec and_reg(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> + binary(). +and_reg(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% AArch64 AND (shifted register) encoding: AND Rd, Rn, Rm + %% 10001010000mmmmm000000nnnnndddddd (64-bit) + << + (16#8A000000 bor (RmNum bsl 16) bor (RnNum bsl 5) bor RdNum):32/little + >>; +and_reg(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + case Imm of + %% special case for #192 + 192 -> + <<(16#927A0420 bor (RnNum bsl 5) bor RdNum):32/little>>; + _ -> + << + (16#92000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little + >> + end. + +%% Emit a logical shift left instruction +-spec lsl(aarch64_gpr_register(), aarch64_gpr_register(), integer()) -> binary(). +lsl(Rd, Rn, Shift) when is_atom(Rd), is_atom(Rn), is_integer(Shift), Shift >= 0, Shift =< 63 -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + %% AArch64 LSL (immediate) encoding: LSL Rd, Rn, #shift + %% This is UBFM Rd, Rn, #(-shift MOD 64), #(63-shift): 1101001101ssssssrrrrrrnnnnndddddd + NegShift = (-Shift) band 63, + Width = 63 - Shift, + << + (16#D3400000 bor ((NegShift band 16#3F) bsl 16) bor ((Width band 16#3F) bsl 10) bor + (RnNum bsl 5) bor RdNum):32/little + >>. + +%% Emit a logical shift right instruction +-spec lsr(aarch64_gpr_register(), aarch64_gpr_register(), integer()) -> binary(). +lsr(Rd, Rn, Shift) when is_atom(Rd), is_atom(Rn), is_integer(Shift), Shift >= 0, Shift =< 63 -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + %% AArch64 LSR (immediate) encoding: LSR Rd, Rn, #shift + %% This is UBFM Rd, Rn, #shift, #63: 1101001101ssssss111111nnnnndddddd + << + (16#D340FC00 bor ((Shift band 16#3F) bsl 16) bor (RnNum bsl 5) bor RdNum):32/little + >>. + +%% Emit a return instruction +-spec ret() -> binary(). +ret() -> + %% AArch64 RET encoding: RET (defaults to X30/LR) + %% 11010110010111110000001111000000 + <<16#D65F03C0:32/little>>. + +%% Emit a test instruction (bitwise AND, discarding result) +-spec tst(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). +tst(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% AArch64 TST (shifted register) encoding: TST Rn, Rm + %% This is ANDS XZR, Rn, Rm: 11101010000mmmmm000000nnnnn11111 + <<(16#EA00001F bor (RmNum bsl 16) bor (RnNum bsl 5)):32/little>>; +tst(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> + RnNum = reg_to_num(Rn), + case Imm of + %% special case for #16 + 16 -> + <<(16#F27C001F bor (RnNum bsl 5)):32/little>>; + _ -> + if + Imm band (Imm - 1) =:= 0, Imm > 0, Imm =< 16#8000000000000000 -> + BitPos = trunc(math:log2(Imm)), + <<(16#F200001F bor (BitPos bsl 16) bor (RnNum bsl 5)):32/little>>; + true -> + << + (16#F200001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little + >> + end + end. + +%% Emit a 32-bit test instruction (bitwise AND, discarding result) +-spec tst32(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). +tst32(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% AArch64 TST (32-bit shifted register) encoding: TST Wn, Wm + %% This is ANDS WZR, Wn, Wm: 01101010000mmmmm000000nnnnn11111 + <<(16#6A00001F bor (RmNum bsl 16) bor (RnNum bsl 5)):32/little>>; +tst32(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> + RnNum = reg_to_num(Rn), + case Imm of + %% special case for #16 + 16 -> + <<(16#721C001F bor (RnNum bsl 5)):32/little>>; + _ -> + if + Imm band (Imm - 1) =:= 0, Imm > 0, Imm =< 16#80000000 -> + BitPos = trunc(math:log2(Imm)), + <<(16#7200001F bor (BitPos bsl 16) bor (RnNum bsl 5)):32/little>>; + true -> + << + (16#7200001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little + >> + end + end. + +%% Patch an unconditional branch (B) instruction with a new offset +%% Takes the previous 32-bit instruction and a new offset, returns the patched instruction +-spec patch_b_offset(binary(), integer()) -> binary(). +patch_b_offset(<>, NewOffset) when is_integer(NewOffset) -> + Opcode = PrevInstr band 16#FC000000, + case Opcode of + 16#14000000 -> + <<(16#14000000 bor (NewOffset div 4)):32/little>>; + _ -> + <> + end. + +%% Patch a conditional branch (B.cond) instruction with a new offset +%% Takes the previous 32-bit instruction and a new offset, returns the patched instruction +-spec patch_bcc_offset(binary(), integer()) -> binary(). +patch_bcc_offset(<>, NewOffset) when is_integer(NewOffset) -> + Opcode = PrevInstr band 16#FF000000, + Cond = PrevInstr band 16#0000000F, + case Opcode of + 16#54000000 -> + <<(16#54000000 bor ((NewOffset div 4) bsl 5) bor Cond):32/little>>; + _ -> + <> + end. + +%% Emit a subtract and set flags (SUBS) instruction (AArch64 encoding) +%% SUBS Rd, Rn, Rm/imm - subtracts and sets condition flags +-spec subs(aarch64_gpr_register(), aarch64_gpr_register(), integer() | aarch64_gpr_register()) -> + binary(). +subs(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + %% AArch64 SUBS (immediate): 1111000101iiiiiiiiiiiinnnnndddddd + <<(16#F1000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>; +subs(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% AArch64 SUBS (register): 11101011000mmmmm000000nnnnndddddd + <<(16#EB000000 bor (RmNum bsl 16) bor (RnNum bsl 5) bor RdNum):32/little>>. + +%% Emit an ADR (PC-relative address) instruction (AArch64 encoding) +%% Dst is destination register atom, Offset is signed immediate (in bytes, -1MB..+1MB) +-spec adr(aarch64_gpr_register(), integer()) -> binary(). +adr(Dst, Imm) when is_atom(Dst), is_integer(Imm), Imm >= -1048576, Imm =< 1048572 -> + DstNum = reg_to_num(Dst), + ImmLo = Imm band 3, + ImmHi = Imm bsr 2, + Word = (16#10000000) bor (ImmLo bsl 29) bor ((ImmHi band 16#7FFFF) bsl 5) bor DstNum, + <>. diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index 05d500045..a3d8a8fb7 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -154,6 +154,7 @@ enum TrapAndLoadResult #define JIT_FORMAT_VERSION 1 #define JIT_ARCH_X86_64 1 +#define JIT_ARCH_AARCH64 2 #define JIT_VARIANT_PIC 1 diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 9dbe75452..70f46ccc0 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -26,6 +26,8 @@ set(ERLANG_MODULES tests jit_tests jit_tests_common + jit_aarch64_tests + jit_aarch64_asm_tests jit_x86_64_tests jit_x86_64_asm_tests ) diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl new file mode 100644 index 000000000..31237f186 --- /dev/null +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -0,0 +1,300 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_aarch64_asm_tests). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +add_test_() -> + [ + ?_assertEqual(<<16#9100e0e7:32/little>>, jit_aarch64_asm:add(r7, r7, 56)), + ?_assertEqual(<<16#91000000:32/little>>, jit_aarch64_asm:add(r0, r0, 0)), + ?_assertEqual(<<16#91000421:32/little>>, jit_aarch64_asm:add(r1, r1, 1)) + ]. + +b_test_() -> + [ + ?_assertEqual(<<16#14000000:32/little>>, jit_aarch64_asm:b(0)), + ?_assertEqual(<<16#14000004:32/little>>, jit_aarch64_asm:b(16)), + ?_assertEqual(<<16#14000001:32/little>>, jit_aarch64_asm:b(4)) + ]. + +brk_test_() -> + [ + ?_assertEqual(<<16#D4200000:32/little>>, jit_aarch64_asm:brk(0)), + ?_assertEqual(<<16#D4201900:32/little>>, jit_aarch64_asm:brk(200)) + ]. + +blr_test_() -> + [ + ?_assertEqual(<<16#D63F0000:32/little>>, jit_aarch64_asm:blr(r0)), + ?_assertEqual(<<16#D63F0020:32/little>>, jit_aarch64_asm:blr(r1)), + ?_assertEqual(<<16#D63F01A0:32/little>>, jit_aarch64_asm:blr(r13)) + ]. + +br_test_() -> + [ + ?_assertEqual(<<16#D61F0000:32/little>>, jit_aarch64_asm:br(r0)), + ?_assertEqual(<<16#D61F0020:32/little>>, jit_aarch64_asm:br(r1)), + ?_assertEqual(<<16#D61F01A0:32/little>>, jit_aarch64_asm:br(r13)) + ]. + +ldr_test_() -> + [ + ?_assertEqual(<<16#F9400421:32/little>>, jit_aarch64_asm:ldr(r1, {8, r1})), + ?_assertEqual(<<16#F9403042:32/little>>, jit_aarch64_asm:ldr(r2, {96, r2})) + ]. + +mov_test_() -> + [ + % mov immediate - simple cases + ?_assertEqual(<<16#D2800000:32/little>>, jit_aarch64_asm:mov(r0, 0)), + ?_assertEqual(<<16#D2801901:32/little>>, jit_aarch64_asm:mov(r1, 200)), + ?_assertEqual(<<16#d28000b3:32/little>>, jit_aarch64_asm:mov(r19, 5)), + ?_assertEqual(<<16#92800094:32/little>>, jit_aarch64_asm:mov(r20, -5)), + ?_assertEqual(<<16#d2800015:32/little>>, jit_aarch64_asm:mov(r21, 0)), + ?_assertEqual(<<16#d29ffff0:32/little>>, jit_aarch64_asm:mov(r16, 16#FFFF)), + ?_assertEqual(<<16#929fffcf:32/little>>, jit_aarch64_asm:mov(r15, -16#FFFF)), + + % mov immediate - complex cases requiring multiple instructions + ?_assertEqual(<<16#d2a00052:32/little>>, jit_aarch64_asm:mov(r18, 16#20000)), + ?_assertEqual(<<16#b26fbbf1:32/little>>, jit_aarch64_asm:mov(r17, -131072)), + + % mov immediate - very large value requiring multiple instructions + ?_assertEqual( + <<16#D29579A1:32/little, 16#F2B7C041:32/little, 16#F2DFD741:32/little, + 16#F2EFF941:32/little>>, + jit_aarch64_asm:mov(r1, 9208452466117618637) + ), + + % mov register + ?_assertEqual(<<16#AA0103E0:32/little>>, jit_aarch64_asm:mov(r0, r1)), + ?_assertEqual(<<16#AA0703E1:32/little>>, jit_aarch64_asm:mov(r1, r7)) + ]. + +orr_test_() -> + [ + % ORR Rd, XZR, Rm (MOV) + ?_assertEqual(<<16#AA0103E0:32/little>>, jit_aarch64_asm:orr(r0, xzr, r1)), + % ORR Rd, Rn, Rm + ?_assertEqual(<<16#AA010020:32/little>>, jit_aarch64_asm:orr(r0, r1, r1)), + ?_assertEqual(<<16#AA020041:32/little>>, jit_aarch64_asm:orr(r1, r2, r2)) + ]. + +str_test_() -> + [ + ?_assertEqual(<<16#F9000421:32/little>>, jit_aarch64_asm:str(r1, {8, r1})), + ?_assertEqual(<<16#F9003042:32/little>>, jit_aarch64_asm:str(r2, {96, r2})), + % str with xzr (zero register) - stores zero to memory + ?_assertEqual(<<16#F900001F:32/little>>, jit_aarch64_asm:str(xzr, {0, r0})), + ?_assertEqual(<<16#F900043F:32/little>>, jit_aarch64_asm:str(xzr, {8, r1})), + ?_assertEqual(<<16#F900085F:32/little>>, jit_aarch64_asm:str(xzr, {16, r2})) + ]. + +str_x_test_() -> + [ + % Store-update (writeback) with SP + ?_assertEqual( + <<16#F81F0FE7:32/little>>, + jit_aarch64_asm:str_x(r7, {sp, -16}, '!') + ), + % Store-update (writeback) with SP, positive offset + ?_assertEqual( + <<16#F8010FE7:32/little>>, + jit_aarch64_asm:str_x(r7, {sp, 16}, '!') + ), + % Store-update (writeback) with SP, zero offset + ?_assertEqual( + <<16#F80007E7:32/little>>, + jit_aarch64_asm:str_x(r7, {sp}, 0) + ) + ]. + +cmp_test_() -> + [ + % cmp reg, reg + ?_assertEqual(<<16#EB01001F:32/little>>, jit_aarch64_asm:cmp(r0, r1)), + % cmp reg, imm + ?_assertEqual(<<16#F100001F:32/little>>, jit_aarch64_asm:cmp(r0, 0)), + ?_assertEqual(<<16#F103001F:32/little>>, jit_aarch64_asm:cmp(r0, 192)) + ]. + +cmp32_test_() -> + [ + % cmp32 reg, reg + ?_assertEqual(<<16#6B01001F:32/little>>, jit_aarch64_asm:cmp32(r0, r1)), + % cmp32 reg, imm + ?_assertEqual(<<16#7100001F:32/little>>, jit_aarch64_asm:cmp32(r0, 0)), + ?_assertEqual(<<16#7103001F:32/little>>, jit_aarch64_asm:cmp32(r0, 192)) + ]. + +and_reg_test_() -> + [ + % AND reg, reg, reg + ?_assertEqual(<<16#8A010020:32/little>>, jit_aarch64_asm:and_reg(r0, r1, r1)), + % AND reg, reg, imm + ?_assertEqual(<<16#927A0420:32/little>>, jit_aarch64_asm:and_reg(r0, r1, 192)) + ]. + +lsl_test_() -> + [ + ?_assertEqual(<<16#D3607C00:32/little>>, jit_aarch64_asm:lsl(r0, r0, 32)) + ]. + +lsr_test_() -> + [ + ?_assertEqual(<<16#D340FC00:32/little>>, jit_aarch64_asm:lsr(r0, r0, 0)), + ?_assertEqual(<<16#D340FC01:32/little>>, jit_aarch64_asm:lsr(r1, r0, 0)), + ?_assertEqual(<<16#D360FC00:32/little>>, jit_aarch64_asm:lsr(r0, r0, 32)) + ]. + +ret_test_() -> + [ + ?_assertEqual(<<16#D65F03C0:32/little>>, jit_aarch64_asm:ret()) + ]. + +tst_test_() -> + [ + % TST reg, reg + ?_assertEqual(<<16#EA01001F:32/little>>, jit_aarch64_asm:tst(r0, r1)), + % TST reg, imm (power of 2) + ?_assertEqual(<<16#F27C001F:32/little>>, jit_aarch64_asm:tst(r0, 16)) + ]. + +tst32_test_() -> + [ + % TST32 reg, reg + ?_assertEqual(<<16#6A01001F:32/little>>, jit_aarch64_asm:tst32(r0, r1)), + % TST32 reg, imm (power of 2) + ?_assertEqual(<<16#721C001F:32/little>>, jit_aarch64_asm:tst32(r0, 16)) + ]. + +bcc_test_() -> + [ + ?_assertEqual(<<16#54000000:32/little>>, jit_aarch64_asm:bcc(eq, 0)), + ?_assertEqual(<<16#54000001:32/little>>, jit_aarch64_asm:bcc(ne, 0)), + ?_assertEqual(<<16#54000400:32/little>>, jit_aarch64_asm:bcc(eq, 128)) + ]. + +patch_b_offset_test_() -> + [ + ?_assertEqual( + <<16#14000001:32/little>>, jit_aarch64_asm:patch_b_offset(<<16#14000000:32/little>>, 4) + ), + ?_assertEqual( + <<16#14000010:32/little>>, jit_aarch64_asm:patch_b_offset(<<16#14000000:32/little>>, 64) + ) + ]. + +patch_bcc_offset_test_() -> + [ + ?_assertEqual( + <<16#54000400:32/little>>, + jit_aarch64_asm:patch_bcc_offset(<<16#54000000:32/little>>, 128) + ), + ?_assertEqual( + <<16#54000020:32/little>>, + jit_aarch64_asm:patch_bcc_offset(<<16#54000000:32/little>>, 4) + ) + ]. + +stp_x_test_() -> + [ + ?_assertEqual( + <<16#a8815113:32/little>>, + jit_aarch64_asm:stp_x(r19, r20, {r8}, 16) + ), + ?_assertEqual( + <<16#a88153f3:32/little>>, + jit_aarch64_asm:stp_x(r19, r20, {sp}, 16) + ), + % Store-update (writeback) variants + ?_assertEqual( + <<16#a9bf27e8:32/little>>, + jit_aarch64_asm:stp_x(r8, r9, {sp, -16}, '!') + ), + ?_assertEqual( + <<16#a98127e8:32/little>>, + jit_aarch64_asm:stp_x(r8, r9, {sp, 16}, '!') + ), + ?_assertEqual( + <<16#a98027e8:32/little>>, + jit_aarch64_asm:stp_x(r8, r9, {sp, 0}, '!') + ) + ]. + +ldp_x_test_() -> + [ + ?_assertEqual( + <<16#a8c15113:32/little>>, + jit_aarch64_asm:ldp_x(r19, r20, {r8}, 16) + ), + ?_assertEqual( + <<16#a8c153f3:32/little>>, + jit_aarch64_asm:ldp_x(r19, r20, {sp}, 16) + ) + ]. + +ldr_x_test_() -> + [ + % Load-update (writeback) with SP, negative offset + ?_assertEqual( + <<16#F85F0FE7:32/little>>, + jit_aarch64_asm:ldr_x(r7, {sp, -16}, '!') + ), + % Load-update (writeback) with SP, positive offset + ?_assertEqual( + <<16#F8410FE7:32/little>>, + jit_aarch64_asm:ldr_x(r7, {sp, 16}, '!') + ), + % Load-update (writeback) with SP, zero offset + ?_assertEqual( + <<16#F84007E7:32/little>>, + jit_aarch64_asm:ldr_x(r7, {sp}, 0) + ) + ]. + +subs_test_() -> + [ + % SUBS with immediate + ?_assertEqual(<<16#F1000021:32/little>>, jit_aarch64_asm:subs(r1, r1, 0)), + ?_assertEqual(<<16#F1000421:32/little>>, jit_aarch64_asm:subs(r1, r1, 1)), + % SUBS with register + ?_assertEqual(<<16#eb000021:32/little>>, jit_aarch64_asm:subs(r1, r1, r0)), + ?_assertEqual(<<16#eb0a0021:32/little>>, jit_aarch64_asm:subs(r1, r1, r10)) + ]. + +adr_test_() -> + [ + %% ADR x0, #0 + ?_assertEqual(<<16#10000000:32/little>>, jit_aarch64_asm:adr(r0, 0)), + %% ADR x1, #4 + ?_assertEqual(<<16#10000021:32/little>>, jit_aarch64_asm:adr(r1, 4)), + %% ADR x2, #-4 + ?_assertEqual(<<16#10ffffe2:32/little>>, jit_aarch64_asm:adr(r2, -4)), + %% ADR x3, #1048572 (max positive) + ?_assertEqual(<<16#107fffe3:32/little>>, jit_aarch64_asm:adr(r3, 1048572)), + %% ADR x4, #-1048576 (max negative) + ?_assertEqual(<<16#10800004:32/little>>, jit_aarch64_asm:adr(r4, -1048576)), + %% ADR with offset not a multiple of 4 is valid + ?_assertEqual(<<16#70000000:32/little>>, jit_aarch64_asm:adr(r0, 3)) + ]. diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl new file mode 100644 index 000000000..610023587 --- /dev/null +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -0,0 +1,367 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_aarch64_tests). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-include("jit/include/jit.hrl"). +-include("jit/src/term.hrl"). +-include("jit/src/default_atoms.hrl"). +-include("jit/src/primitives.hrl"). + +-define(BACKEND, jit_aarch64). + +% disassembly obtained with: +% aarch64-elf-objdump -b binary -D dump.bin -M aarch64 + +call_primitive_0_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 0, [ctx, jit_state]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + "0: f9400050 ldr x16, [x2]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d63f0200 blr x16\n" + " 10: aa0003e7 mov x7, x0\n" + " 14: a8c10be1 ldp x1, x2, [sp], #16\n" + " 18: a8c103fe ldp x30, x0, [sp], #16\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_1_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 1, [ctx, jit_state]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + "0: f9400450 ldr x16, [x2, #8]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d63f0200 blr x16\n" + " 10: aa0003e7 mov x7, x0\n" + " 14: a8c10be1 ldp x1, x2, [sp], #16\n" + " 18: a8c103fe ldp x30, x0, [sp], #16\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_2_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 2, [ctx, 42, 43, 44]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9400850 ldr x16, [x2, #16]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d2800541 mov x1, #0x2a // #42\n" + " 10: d2800562 mov x2, #0x2b // #43\n" + " 14: d2800583 mov x3, #0x2c // #44\n" + " 18: d63f0200 blr x16\n" + " 1c: aa0003e7 mov x7, x0\n" + " 20: a8c10be1 ldp x1, x2, [sp], #16\n" + " 24: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_extended_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State2, RegB} = ?BACKEND:call_primitive(State1, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 20]), + {State3, RegC} = ?BACKEND:call_primitive(State2, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State4, ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_PUT_LIST, [ + ctx, {free, {ptr, RegA}}, {free, {ptr, RegB}} + ]), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {ptr, RegC}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg, {ptr, RegC}]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = + << + "\n" + " 0: f9404850 ldr x16, [x2, #144]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d2800261 mov x1, #0x13 // #19\n" + " 10: d63f0200 blr x16\n" + " 14: aa0003e7 mov x7, x0\n" + " 18: a8c10be1 ldp x1, x2, [sp], #16\n" + " 1c: a8c103fe ldp x30, x0, [sp], #16\n" + " 20: f9404850 ldr x16, [x2, #144]\n" + " 24: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 28: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 2c: f81f0fe7 str x7, [sp, #-16]!\n" + " 30: d2800281 mov x1, #0x14 // #20\n" + " 34: d63f0200 blr x16\n" + " 38: aa0003e8 mov x8, x0\n" + " 3c: f84107e7 ldr x7, [sp], #16\n" + " 40: a8c10be1 ldp x1, x2, [sp], #16\n" + " 44: a8c103fe ldp x30, x0, [sp], #16\n" + " 48: f9404850 ldr x16, [x2, #144]\n" + " 4c: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 50: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 54: a9bf1fe8 stp x8, x7, [sp, #-16]!\n" + " 58: d2800261 mov x1, #0x13 // #19\n" + " 5c: d63f0200 blr x16\n" + " 60: aa0003e9 mov x9, x0\n" + " 64: a8c11fe8 ldp x8, x7, [sp], #16\n" + " 68: a8c10be1 ldp x1, x2, [sp], #16\n" + " 6c: a8c103fe ldp x30, x0, [sp], #16\n" + " 70: f9403450 ldr x16, [x2, #104]\n" + " 74: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 78: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 7c: f81f0fe9 str x9, [sp, #-16]!\n" + " 80: f94000e1 ldr x1, [x7]\n" + " 84: f9400102 ldr x2, [x8]\n" + " 88: d63f0200 blr x16\n" + " 8c: aa0003e7 mov x7, x0\n" + " 90: f84107e9 ldr x9, [sp], #16\n" + " 94: a8c10be1 ldp x1, x2, [sp], #16\n" + " 98: a8c103fe ldp x30, x0, [sp], #16\n" + " 9c: f9000127 str x7, [x9]\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_only_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, -1]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9400827 ldr x7, [x1, #16]\n" + " 4: f10004e7 subs x7, x7, #0x1\n" + " 8: f9000827 str x7, [x1, #16]\n" + " c: 540000a1 b.ne 0x20 // b.any\n" + " 10: 10000087 adr x7, 0x20\n" + " 14: f9000427 str x7, [x1, #8]\n" + " 18: f9400847 ldr x7, [x2, #16]\n" + " 1c: d61f00e0 br x7\n" + " 20: f9401047 ldr x7, [x2, #32]\n" + " 24: 92800002 mov x2, #0xffffffffffffffff // #-1\n" + " 28: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, 2, 2, 10]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9400827 ldr x7, [x1, #16]\n" + " 4: f10004e7 subs x7, x7, #0x1\n" + " 8: f9000827 str x7, [x1, #16]\n" + " c: 540000a1 b.ne 0x20 // b.any\n" + " 10: 10000087 adr x7, 0x20\n" + " 14: f9000427 str x7, [x1, #8]\n" + " 18: f9400847 ldr x7, [x2, #16]\n" + " 1c: d61f00e0 br x7\n" + " 20: f9401047 ldr x7, [x2, #32]\n" + " 24: d2800042 mov x2, #0x2 // #2\n" + " 28: d2800043 mov x3, #0x2 // #2\n" + " 2c: d2800144 mov x4, #0xa // #10\n" + " 30: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state, 42]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9400047 ldr x7, [x2]\n" + " 4: d2800542 mov x2, #0x2a // #42\n" + " 8: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_cp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f94000e7 ldr x7, [x7]\n" + " 8: f9005c07 str x7, [x0, #184]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +increment_sp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:increment_sp(State0, 7), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: 9100e0e7 add x7, x7, #0x38\n" + " 8: f9001407 str x7, [x0, #40]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_only_or_schedule_next_and_label_relocation_test() -> + %% TODO: Implement AArch64 version + ok. + +call_bif_with_large_literal_integer_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), + {State2, ArgReg} = ?BACKEND:call_primitive(State1, 15, [ctx, 9208452466117618637]), + {State3, ResultReg} = ?BACKEND:call_func_ptr(State2, {free, FuncPtr}, [ + ctx, 0, 1, {free, {x_reg, 0}}, {free, ArgReg} + ]), + State4 = ?BACKEND:if_block(State3, {ResultReg, '==', 0}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {x_reg, 0}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), +% ok = file:write_file("dump.bin", Stream), + Dump = + << + " 0: f9402050 ldr x16, [x2, #64]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: aa0103e0 mov x0, x1\n" + " 10: d2800041 mov x1, #0x2 // #2\n" + " 14: d63f0200 blr x16\n" + " 18: aa0003e7 mov x7, x0\n" + " 1c: a8c10be1 ldp x1, x2, [sp], #16\n" + " 20: a8c103fe ldp x30, x0, [sp], #16\n" + " 24: f9403c50 ldr x16, [x2, #120]\n" + " 28: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 2c: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 30: f81f0fe7 str x7, [sp, #-16]!\n" + " 34: d29579a1 mov x1, #0xabcd // #43981\n" + " 38: f2b7c041 movk x1, #0xbe02, lsl #16\n" + " 3c: f2dfd741 movk x1, #0xfeba, lsl #32\n" + " 40: f2eff941 movk x1, #0x7fca, lsl #48\n" + " 44: d63f0200 blr x16\n" + " 48: aa0003e8 mov x8, x0\n" + " 4c: f84107e7 ldr x7, [sp], #16\n" + " 50: a8c10be1 ldp x1, x2, [sp], #16\n" + " 54: a8c103fe ldp x30, x0, [sp], #16\n" + " 58: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 5c: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 60: d2800001 mov x1, #0x0 // #0\n" + " 64: d2800022 mov x2, #0x1 // #1\n" + " 68: f9401803 ldr x3, [x0, #48]\n" + " 6c: aa0803e4 mov x4, x8\n" + " 70: d63f00e0 blr x7\n" + " 74: aa0003e7 mov x7, x0\n" + " 78: a8c10be1 ldp x1, x2, [sp], #16\n" + " 7c: a8c103fe ldp x30, x0, [sp], #16\n" + " 80: ea0700ff tst x7, x7\n" + " 84: 54000081 b.ne 0x94 // b.any\n" + " 88: f9401847 ldr x7, [x2, #48]\n" + " 8c: d2801182 mov x2, #0x8c // #140\n" + " 90: d61f00e0 br x7\n" + " 94: f9001807 str x7, [x0, #48]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +get_list_test() -> + %% TODO: Implement AArch64 version + ok. + +is_integer_test() -> + %% TODO: Implement AArch64 version + ok. + +is_boolean_test() -> + %% TODO: Implement AArch64 version + ok. + +call_ext_test() -> + %% TODO: Implement AArch64 version + ok. + +call_fun_test() -> + %% TODO: Implement AArch64 version + ok. + +move_to_vm_register_test_() -> + %% TODO: Implement AArch64 version + []. + +move_array_element_test_() -> + %% TODO: Implement AArch64 version + []. + +get_array_element_test_() -> + %% TODO: Implement AArch64 version + []. + +move_to_array_element_test_() -> + %% TODO: Implement AArch64 version + []. + +dump_to_bin(Dump) -> + dump_to_bin0(Dump, addr, []). + +-define(IS_HEX_DIGIT(C), + ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F)) +). + +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\t, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<<$\t, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $\t orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) andalso + ?IS_HEX_DIGIT(H5) andalso + ?IS_HEX_DIGIT(H6) andalso + ?IS_HEX_DIGIT(H7) andalso + ?IS_HEX_DIGIT(H8) +-> + %% Parse 8 hex digits (AArch64 32-bit instruction) + Instr = list_to_integer([H1, H2, H3, H4, H5, H6, H7, H8], 16), + dump_to_bin0(Rest, instr, [<> | Acc]); +dump_to_bin0(<<$\n, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<_Other, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, instr, Acc); +dump_to_bin0(<<>>, _, Acc) -> + list_to_binary(lists:reverse(Acc)). diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl index 6f3f387e3..cc783796d 100644 --- a/tests/libs/jit/tests.erl +++ b/tests/libs/jit/tests.erl @@ -27,6 +27,7 @@ start() -> etest:test([ jit_tests, + jit_aarch64_asm_tests, jit_x86_64_tests, jit_x86_64_asm_tests ]). From e28af9db95b13223571d43c6a400a3374a046c72 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 21 Jul 2025 07:48:16 +0200 Subject: [PATCH 07/46] AArch64: get rid of patch_* helpers, fix warnings Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 77 +++++++++++------------- libs/jit/src/jit_aarch64_asm.erl | 35 +++-------- tests/libs/jit/jit_aarch64_asm_tests.erl | 22 ------- 3 files changed, 43 insertions(+), 91 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index fe3e57a94..aed4a8d73 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -536,8 +536,8 @@ if_block( {Replacements, State1} = lists:foldl( fun(Cond, {AccReplacements, AccState}) -> Offset = StreamModule:offset(AccState#state.stream), - {NewAccState, ReplaceDelta} = if_block_cond(AccState, Cond), - {[Offset + ReplaceDelta | AccReplacements], NewAccState} + {NewAccState, CC, ReplaceDelta} = if_block_cond(AccState, Cond), + {[{Offset + ReplaceDelta, CC} | AccReplacements], NewAccState} end, {[], State0}, CondList @@ -546,11 +546,10 @@ if_block( Stream2 = State2#state.stream, OffsetAfter = StreamModule:offset(Stream2), Stream3 = lists:foldl( - fun(ReplacementOffset, AccStream) -> + fun({ReplacementOffset, CC}, AccStream) -> BranchOffset = OffsetAfter - ReplacementOffset, - StreamModule:map(Stream2, ReplacementOffset, 4, fun(PrevValue) -> - jit_aarch64_asm:patch_bcc_offset(PrevValue, BranchOffset) - end) + NewBranchInstr = jit_aarch64_asm:bcc(CC, BranchOffset), + StreamModule:replace(AccStream, ReplacementOffset, NewBranchInstr) end, Stream2, Replacements @@ -562,15 +561,14 @@ if_block( BlockFn ) -> Offset = StreamModule:offset(Stream0), - {State1, BranchInstrOffset} = if_block_cond(State0, Cond), + {State1, CC, BranchInstrOffset} = if_block_cond(State0, Cond), State2 = BlockFn(State1), Stream2 = State2#state.stream, OffsetAfter = StreamModule:offset(Stream2), %% Patch the conditional branch instruction to jump to the end of the block BranchOffset = OffsetAfter - (Offset + BranchInstrOffset), - Stream3 = StreamModule:map(Stream2, Offset + BranchInstrOffset, 4, fun(PrevValue) -> - jit_aarch64_asm:patch_bcc_offset(PrevValue, BranchOffset) - end), + NewBranchInstr = jit_aarch64_asm:bcc(CC, BranchOffset), + Stream3 = StreamModule:replace(Stream2, Offset + BranchInstrOffset, NewBranchInstr), merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs). %%----------------------------------------------------------------------------- @@ -592,22 +590,20 @@ if_else_block( BlockFalseFn ) -> Offset = StreamModule:offset(Stream0), - {State1, BranchInstrOffset} = if_block_cond(State0, Cond), - OffsetAfterCond = StreamModule:offset(State1#state.stream), + {State1, CC, BranchInstrOffset} = if_block_cond(State0, Cond), State2 = BlockTrueFn(State1), Stream2 = State2#state.stream, + %% Emit unconditional branch to skip the else block (will be replaced) ElseJumpOffset = StreamModule:offset(Stream2), - %% Emit unconditional branch to skip the else block - - % Placeholder offset, will be patched - I = jit_aarch64_asm:b(0), - Stream3 = StreamModule:append(Stream2, I), - OffsetAfterJump = StreamModule:offset(Stream3), + ElseJumpInstr = jit_aarch64_asm:b(0), + Stream3 = StreamModule:append(Stream2, ElseJumpInstr), + %% Else block starts here. + OffsetAfter = StreamModule:offset(Stream3), %% Patch the conditional branch to jump to the else block - ElseBranchOffset = OffsetAfterJump - (Offset + BranchInstrOffset), - Stream4 = StreamModule:map(Stream3, Offset + BranchInstrOffset, 4, fun(PrevValue) -> - jit_aarch64_asm:patch_bcc_offset(PrevValue, ElseBranchOffset) - end), + ElseBranchOffset = OffsetAfter - (Offset + BranchInstrOffset), + NewBranchInstr = jit_aarch64_asm:bcc(CC, ElseBranchOffset), + Stream4 = StreamModule:replace(Stream3, Offset + BranchInstrOffset, NewBranchInstr), + %% Build the else block StateElse = State2#state{ stream = Stream4, used_regs = State1#state.used_regs, @@ -618,13 +614,12 @@ if_else_block( Stream5 = State3#state.stream, OffsetFinal = StreamModule:offset(Stream5), %% Patch the unconditional branch to jump to the end - FinalJumpOffset = OffsetFinal - OffsetAfterJump, - Stream6 = StreamModule:map(Stream5, ElseJumpOffset, 4, fun(PrevValue) -> - jit_aarch64_asm:patch_b_offset(PrevValue, FinalJumpOffset) - end), + FinalJumpOffset = OffsetFinal - ElseJumpOffset, + NewElseJumpInstr = jit_aarch64_asm:b(FinalJumpOffset), + Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs). --spec if_block_cond(state(), condition()) -> {state(), non_neg_integer()}. +-spec if_block_cond(state(), condition()) -> {state(), jit_aarch64_asm:cc(), non_neg_integer()}. if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> I1 = jit_aarch64_asm:tst(Reg, Reg), % pl = positive or zero (>=0) @@ -635,7 +630,7 @@ if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, { >>, Stream1 = StreamModule:append(Stream0, Code), State1 = State0#state{stream = Stream1}, - {State1, byte_size(I1)}; + {State1, pl, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegA, '<', RegB} @@ -649,7 +644,7 @@ if_block_cond( >>, Stream1 = StreamModule:append(Stream0, Code), State1 = State0#state{stream = Stream1}, - {State1, byte_size(I1)}; + {State1, ge, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0} ) -> @@ -668,7 +663,7 @@ if_block_cond( Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, byte_size(I1)}; + {State2, ne, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {'(int)', RegOrTuple, '==', 0} ) -> @@ -686,7 +681,7 @@ if_block_cond( Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, byte_size(I1)}; + {State2, ne, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '!=', Val} @@ -709,7 +704,7 @@ if_block_cond( Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, byte_size(I1)}; + {State2, eq, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {'(int)', RegOrTuple, '!=', Val} @@ -732,7 +727,7 @@ if_block_cond( Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, byte_size(I1)}; + {State2, eq, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', Val} @@ -755,7 +750,7 @@ if_block_cond( Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, byte_size(I1)}; + {State2, ne, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {'(int)', RegOrTuple, '==', Val} @@ -778,7 +773,7 @@ if_block_cond( Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, byte_size(I1)}; + {State2, ne, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {'(uint8_t)', RegOrTuple, '==', false} @@ -798,7 +793,7 @@ if_block_cond( Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, byte_size(I1)}; + {State2, ne, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {'(uint8_t)', RegOrTuple, '!=', false} @@ -818,7 +813,7 @@ if_block_cond( Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, byte_size(I1)}; + {State2, eq, byte_size(I1)}; if_block_cond( #state{ stream_module = StreamModule, @@ -842,7 +837,7 @@ if_block_cond( >>, Stream1 = StreamModule:append(Stream0, Code), State1 = State0#state{stream = Stream1}, - {State1, byte_size(I1) + byte_size(I2) + byte_size(I3)}; + {State1, eq, byte_size(I1) + byte_size(I2) + byte_size(I3)}; if_block_cond( #state{ stream_module = StreamModule, @@ -863,7 +858,7 @@ if_block_cond( Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, byte_size(I1) + byte_size(I2)}; + {State2, eq, byte_size(I1) + byte_size(I2)}; if_block_cond( #state{ stream_module = StreamModule, @@ -886,7 +881,7 @@ if_block_cond( Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, byte_size(I1)}; + {State2, eq, byte_size(I1)}; if_block_cond( #state{ stream_module = StreamModule, @@ -909,7 +904,7 @@ if_block_cond( Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, byte_size(I1)}. + {State2, eq, byte_size(I1)}. -spec if_block_free_reg(aarch64_register() | {free, aarch64_register()}, state()) -> state(). if_block_free_reg({free, Reg}, State0) -> diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index dee323452..cb9ebadd4 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -36,8 +36,6 @@ movk/3, movz/3, orr/3, - patch_b_offset/2, - patch_bcc_offset/2, ret/0, str/2, str_x/3, @@ -49,6 +47,10 @@ adr/2 ]). +-export_type([ + cc/0 +]). + -type aarch64_gpr_register() :: r0 | r1 @@ -68,6 +70,8 @@ | r15 | xzr. +-type cc() :: eq | ne | cs | cc | mi | pl | vs | vc | hi | ls | ge | lt | gt | le | al | nv. + %% Emit an ADD instruction (AArch64 encoding) %% ADD Rd, Rn, #imm - adds immediate value to register -spec add(aarch64_gpr_register(), aarch64_gpr_register(), integer()) -> binary(). @@ -594,7 +598,7 @@ reg_to_num(sp) -> 31; reg_to_num(xzr) -> 31. %% Emit a conditional branch instruction --spec bcc(atom(), integer()) -> binary(). +-spec bcc(cc(), integer()) -> binary(). bcc(Cond, Offset) when is_atom(Cond), is_integer(Offset) -> CondNum = case Cond of @@ -770,31 +774,6 @@ tst32(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> end end. -%% Patch an unconditional branch (B) instruction with a new offset -%% Takes the previous 32-bit instruction and a new offset, returns the patched instruction --spec patch_b_offset(binary(), integer()) -> binary(). -patch_b_offset(<>, NewOffset) when is_integer(NewOffset) -> - Opcode = PrevInstr band 16#FC000000, - case Opcode of - 16#14000000 -> - <<(16#14000000 bor (NewOffset div 4)):32/little>>; - _ -> - <> - end. - -%% Patch a conditional branch (B.cond) instruction with a new offset -%% Takes the previous 32-bit instruction and a new offset, returns the patched instruction --spec patch_bcc_offset(binary(), integer()) -> binary(). -patch_bcc_offset(<>, NewOffset) when is_integer(NewOffset) -> - Opcode = PrevInstr band 16#FF000000, - Cond = PrevInstr band 16#0000000F, - case Opcode of - 16#54000000 -> - <<(16#54000000 bor ((NewOffset div 4) bsl 5) bor Cond):32/little>>; - _ -> - <> - end. - %% Emit a subtract and set flags (SUBS) instruction (AArch64 encoding) %% SUBS Rd, Rn, Rm/imm - subtracts and sets condition flags -spec subs(aarch64_gpr_register(), aarch64_gpr_register(), integer() | aarch64_gpr_register()) -> diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index 31237f186..7d178fb86 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -195,28 +195,6 @@ bcc_test_() -> ?_assertEqual(<<16#54000400:32/little>>, jit_aarch64_asm:bcc(eq, 128)) ]. -patch_b_offset_test_() -> - [ - ?_assertEqual( - <<16#14000001:32/little>>, jit_aarch64_asm:patch_b_offset(<<16#14000000:32/little>>, 4) - ), - ?_assertEqual( - <<16#14000010:32/little>>, jit_aarch64_asm:patch_b_offset(<<16#14000000:32/little>>, 64) - ) - ]. - -patch_bcc_offset_test_() -> - [ - ?_assertEqual( - <<16#54000400:32/little>>, - jit_aarch64_asm:patch_bcc_offset(<<16#54000000:32/little>>, 128) - ), - ?_assertEqual( - <<16#54000020:32/little>>, - jit_aarch64_asm:patch_bcc_offset(<<16#54000000:32/little>>, 4) - ) - ]. - stp_x_test_() -> [ ?_assertEqual( From 92cfe6626a2db6a6249bd8295da771ce129a7d20 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 21 Jul 2025 08:02:30 +0200 Subject: [PATCH 08/46] AArch64: call_only_or_schedule_next_and_label_relocation_test Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 55 +++++++++++++++------------- libs/jit/src/jit_aarch64_asm.erl | 2 +- tests/libs/jit/jit_aarch64_tests.erl | 34 +++++++++++++++-- 3 files changed, 62 insertions(+), 29 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index aed4a8d73..03d8d2b0d 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -352,7 +352,7 @@ jump_table0( ) -> Offset = StreamModule:offset(Stream0), BranchInstr = jit_aarch64_asm:b(0), - Reloc = {N, Offset, 32}, + Reloc = {N, Offset, b}, Stream1 = StreamModule:append(Stream0, BranchInstr), jump_table0(State#state{stream = Stream1, branches = [Reloc | Branches]}, N + 1, LabelsCount). @@ -370,14 +370,19 @@ update_branches( #state{ stream_module = StreamModule, stream = Stream0, - branches = [{Label, Offset, Size} | BranchesT] + branches = [{Label, Offset, Type} | BranchesT] } = State, Labels ) -> {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), - Rel = ((LabelOffset - Offset) div 4), - Patched = <<(16#14000000 bor (Rel band 16#03FFFFFF)):32>>, - Stream1 = StreamModule:map(Stream0, Offset, Size div 8, fun(_) -> Patched end), + Rel = LabelOffset - Offset, + NewInstr = + case Type of + {bcc, CC} -> jit_aarch64_asm:bcc(CC, Rel); + {adr, Reg} -> jit_aarch64_asm:adr(Reg, Rel); + b -> jit_aarch64_asm:b(Rel) + end, + Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), update_branches(State#state{stream = Stream1, branches = BranchesT}, Labels). %%----------------------------------------------------------------------------- @@ -507,14 +512,9 @@ jump_to_label( #state{stream_module = StreamModule, stream = Stream0, branches = AccBranches} = State, Label ) -> Offset = StreamModule:offset(Stream0), - %% Use unconditional branch instruction B - % Placeholder offset, will be patched - I1 = jit_aarch64_asm:b(-4), - % Offset is at the beginning of the instruction - RelocOffset = 0, - % AArch64 B instruction uses 26-bit offset - Reloc = {Label, Offset + RelocOffset, 26}, + I1 = jit_aarch64_asm:b(0), + Reloc = {Label, Offset, b}, Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1, branches = [Reloc | AccBranches]}. @@ -1859,9 +1859,9 @@ set_continuation_to_label( Label ) -> Offset = StreamModule:offset(Stream0), - {RewriteLEAOffset, I1} = jit_x86_64_asm:leaq_rel32({-4, rip}, Temp), - Reloc = {Label, Offset + RewriteLEAOffset, 32}, - I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION), + I1 = jit_aarch64_asm:adr(Temp, 0), + Reloc = {Label, Offset, {adr, Temp}}, + I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1, branches = [Reloc | Branches]}. @@ -1981,19 +1981,24 @@ call_only_or_schedule_next( #state{ stream_module = StreamModule, stream = Stream0, - branches = Branches + branches = Branches, + available_regs = [Temp | _] } = State0, Label ) -> - Offset = StreamModule:offset(Stream0), - I1 = jit_x86_64_asm:decl(?JITSTATE_REDUCTIONCOUNT), - {RewriteJMPOffset, I3} = jit_x86_64_asm:jmp_rel32(-4), - I2 = jit_x86_64_asm:jz(byte_size(I3)), - Sz = byte_size(I1) + byte_size(I2), - Reloc1 = {Label, Offset + Sz + RewriteJMPOffset, 32}, - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State1 = State0#state{stream = Stream1, branches = [Reloc1 | Branches]}, + % Load reduction count + I1 = jit_aarch64_asm:ldr(Temp, ?JITSTATE_REDUCTIONCOUNT), + % Decrement reduction count + I2 = jit_aarch64_asm:subs(Temp, Temp, 1), + % Store back the decremented value + I3 = jit_aarch64_asm:str(Temp, ?JITSTATE_REDUCTIONCOUNT), + Stream1 = StreamModule:append(Stream0, <>), + BNEOffset = StreamModule:offset(Stream1), + % Branch to label if reduction count is not zero + I4 = jit_aarch64_asm:bcc(ne, 0), + Reloc1 = {Label, BNEOffset, {bcc, ne}}, + Stream2 = StreamModule:append(Stream1, I4), + State1 = State0#state{stream = Stream2, branches = [Reloc1 | Branches]}, State2 = set_continuation_to_label(State1, Label), call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index cb9ebadd4..9a982620c 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -70,7 +70,7 @@ | r15 | xzr. --type cc() :: eq | ne | cs | cc | mi | pl | vs | vc | hi | ls | ge | lt | gt | le | al | nv. +-type cc() :: eq | ne | cs | cc | mi | pl | vs | vc | hi | ls | ge | lt | gt | le | al | nv. %% Emit an ADD instruction (AArch64 encoding) %% ADD Rd, Rn, #imm - adds immediate value to register diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 610023587..2d3eb4cce 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -225,8 +225,37 @@ increment_sp_test() -> ?assertEqual(dump_to_bin(Dump), Stream). call_only_or_schedule_next_and_label_relocation_test() -> - %% TODO: Implement AArch64 version - ok. + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + Offset1 = ?BACKEND:offset(State1), + State2 = ?BACKEND:call_only_or_schedule_next(State1, 2), + Offset2 = ?BACKEND:offset(State2), + State3 = ?BACKEND:call_primitive_last(State2, 0, [ctx, jit_state]), + % OP_INT_CALL_END + Offset0 = ?BACKEND:offset(State3), + State4 = ?BACKEND:call_primitive_last(State3, 1, [ctx, jit_state]), + State5 = ?BACKEND:update_branches(State4, [{0, Offset0}, {1, Offset1}, {2, Offset2}]), + Stream = ?BACKEND:stream(State5), +% ok = file:write_file("dump.bin", Stream), + Dump = + << + " 0: 1400000d b 0x34\n" + " 4: 14000002 b 0xc\n" + " 8: 14000009 b 0x2c\n" + " c: f9400827 ldr x7, [x1, #16]\n" + " 10: f10004e7 subs x7, x7, #0x1\n" + " 14: f9000827 str x7, [x1, #16]\n" + " 18: 540000a1 b.ne 0x2c // b.any\n" + " 1c: 10000087 adr x7, 0x2c\n" + " 20: f9000427 str x7, [x1, #8]\n" + " 24: f9400847 ldr x7, [x2, #16]\n" + " 28: d61f00e0 br x7\n" + " 2c: f9400047 ldr x7, [x2]\n" + " 30: d61f00e0 br x7\n" + " 34: f9400447 ldr x7, [x2, #8]\n" + " 38: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). call_bif_with_large_literal_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), @@ -242,7 +271,6 @@ call_bif_with_large_literal_integer_test() -> State6 = ?BACKEND:free_native_registers(State5, [ResultReg]), ?BACKEND:assert_all_native_free(State6), Stream = ?BACKEND:stream(State6), -% ok = file:write_file("dump.bin", Stream), Dump = << " 0: f9402050 ldr x16, [x2, #64]\n" From 5597cf76731a3c3daf47ad96d414486d6e699be0 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 21 Jul 2025 20:11:24 +0200 Subject: [PATCH 09/46] AArch64: rename and_/3 and fix bitmask with a new test Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 14 ++++----- libs/jit/src/jit_aarch64_asm.erl | 37 ++++++++++++------------ tests/libs/jit/jit_aarch64_asm_tests.erl | 10 +++++-- 3 files changed, 33 insertions(+), 28 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 03d8d2b0d..9a4a41653 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -825,7 +825,7 @@ if_block_cond( % Move Reg to Temp I1 = jit_aarch64_asm:orr(Temp, xzr, Reg), % AND with mask - I2 = jit_aarch64_asm:and_reg(Temp, Temp, Mask), + I2 = jit_aarch64_asm:and_(Temp, Temp, Mask), % Compare with value I3 = jit_aarch64_asm:cmp(Temp, Val), I4 = jit_aarch64_asm:bcc(eq, 0), @@ -846,7 +846,7 @@ if_block_cond( {{free, Reg} = RegTuple, '&', Mask, '!=', Val} ) when ?IS_GPR(Reg) -> % AND with mask - I1 = jit_aarch64_asm:and_reg(Reg, Reg, Mask), + I1 = jit_aarch64_asm:and_(Reg, Reg, Mask), % Compare with value I2 = jit_aarch64_asm:cmp(Reg, Val), I3 = jit_aarch64_asm:bcc(eq, 0), @@ -1445,9 +1445,9 @@ move_array_element( Index, {y_reg, Y} ) when is_integer(Index) -> - I1 = jit_x86_64_asm:movq(?Y_REGS, Temp1), - I2 = jit_x86_64_asm:movq({Index * 8, Reg}, Temp2), - I3 = jit_x86_64_asm:movq(Temp2, {Y * 8, Temp1}), + I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(Temp2, {Index * 8, Reg}), + I3 = jit_aarch64_asm:str(Temp2, {Y * 8, Temp1}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; @@ -1722,7 +1722,7 @@ move_to_native_register( ) when X < ?MAX_REG -> - I1 = jit_x86_64_asm:movq(?X_REG(X), Reg), + I1 = jit_aarch64_asm:ldr(Reg, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, I1), {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; move_to_native_register( @@ -1901,7 +1901,7 @@ get_module_index( }. and_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - I1 = jit_x86_64_asm:andq(Val, Reg), + I1 = jit_aarch64_asm:and_(Reg, Reg, Val), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}. diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index 9a982620c..d85bd897f 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -27,7 +27,7 @@ brk/1, cmp/2, cmp32/2, - and_reg/3, + and_/3, ldr/2, ldr_x/3, lsl/3, @@ -371,13 +371,12 @@ try_encode_single_pattern(Pattern, Size) -> Imms = case Size of 64 -> OnesCount - 1; - 32 -> (1 bsl 5) bor (OnesCount - 1); - 16 -> (1 bsl 4) bor (OnesCount - 1); - 8 -> (1 bsl 3) bor (OnesCount - 1); - 4 -> (1 bsl 2) bor (OnesCount - 1); - 2 -> (1 bsl 1) bor (OnesCount - 1) + 32 -> OnesCount - 1; + 16 -> 2#100000 bor (OnesCount - 1); + 8 -> 2#110000 bor (OnesCount - 1); + 4 -> 2#111000 bor (OnesCount - 1); + 2 -> 2#111100 bor (OnesCount - 1) end, - %% immr is the rotation amount (negate of start position) Immr = (-StartPos) band (Size - 1), @@ -666,9 +665,9 @@ cmp32(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 -> <<(16#7100001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little>>. %% Emit an AND instruction (bitwise AND) --spec and_reg(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> +-spec and_(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). -and_reg(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> +and_(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> RdNum = reg_to_num(Rd), RnNum = reg_to_num(Rn), RmNum = reg_to_num(Rm), @@ -677,17 +676,19 @@ and_reg(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> << (16#8A000000 bor (RmNum bsl 16) bor (RnNum bsl 5) bor RdNum):32/little >>; -and_reg(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) -> +and_(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) -> RdNum = reg_to_num(Rd), RnNum = reg_to_num(Rn), - case Imm of - %% special case for #192 - 192 -> - <<(16#927A0420 bor (RnNum bsl 5) bor RdNum):32/little>>; - _ -> - << - (16#92000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little - >> + case encode_bitmask_immediate(Imm) of + {ok, N, Immr, Imms} -> + % AND immediate encoding: sf=1(64b) 00(op) 100100 N immr imms Rn Rd + Opcode = 16#92000000, + Instr = + Opcode bor (N bsl 22) bor (Immr bsl 16) bor (Imms bsl 10) bor (RnNum bsl 5) bor + RdNum, + <>; + error -> + error({unencodable_immediate, Imm}) end. %% Emit a logical shift left instruction diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index 7d178fb86..55419ee4e 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -147,12 +147,16 @@ cmp32_test_() -> ?_assertEqual(<<16#7103001F:32/little>>, jit_aarch64_asm:cmp32(r0, 192)) ]. -and_reg_test_() -> +and_test_() -> [ % AND reg, reg, reg - ?_assertEqual(<<16#8A010020:32/little>>, jit_aarch64_asm:and_reg(r0, r1, r1)), + ?_assertEqual(<<16#8A010020:32/little>>, jit_aarch64_asm:and_(r0, r1, r1)), % AND reg, reg, imm - ?_assertEqual(<<16#927A0420:32/little>>, jit_aarch64_asm:and_reg(r0, r1, 192)) + ?_assertEqual(<<16#927A0420:32/little>>, jit_aarch64_asm:and_(r0, r1, 192)), + ?_assertEqual(<<16#927ff8e7:32/little>>, jit_aarch64_asm:and_(r7, r7, -2)), + ?_assertEqual(<<16#9200cc41:32/little>>, jit_aarch64_asm:and_(r1, r2, 16#f0f0f0f0f0f0f0f)), + ?_assertEqual(<<16#92603c62:32/little>>, jit_aarch64_asm:and_(r2, r3, 16#ffff00000000)), + ?_assertEqual(<<16#92785c83:32/little>>, jit_aarch64_asm:and_(r3, r4, 16#ffffff00)) ]. lsl_test_() -> From 20073b9b9eb8af251af45a81be4f0b8e8168c52a Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 21 Jul 2025 20:12:04 +0200 Subject: [PATCH 10/46] AArch64: implement get_list_test/0 Signed-off-by: Paul Guyot --- tests/libs/jit/jit_aarch64_tests.erl | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 2d3eb4cce..cd5ccea2c 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -236,7 +236,6 @@ call_only_or_schedule_next_and_label_relocation_test() -> State4 = ?BACKEND:call_primitive_last(State3, 1, [ctx, jit_state]), State5 = ?BACKEND:update_branches(State4, [{0, Offset0}, {1, Offset1}, {2, Offset2}]), Stream = ?BACKEND:stream(State5), -% ok = file:write_file("dump.bin", Stream), Dump = << " 0: 1400000d b 0x34\n" @@ -315,8 +314,26 @@ call_bif_with_large_literal_integer_test() -> ?assertEqual(dump_to_bin(Dump), Stream). get_list_test() -> - %% TODO: Implement AArch64 version - ok. + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:and_(State1, Reg, -4), + State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), + State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + Stream = ?BACKEND:stream(State5), + ok = file:write_file("dump.bin", Stream), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 8: f9401408 ldr x8, [x0, #40]\n" + " c: f94004e9 ldr x9, [x7, #8]\n" + " 10: f9000509 str x9, [x8, #8]\n" + " 14: f9401408 ldr x8, [x0, #40]\n" + " 18: f94000e9 ldr x9, [x7]\n" + " 1c: f9000109 str x9, [x8]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). is_integer_test() -> %% TODO: Implement AArch64 version From 880ca814173c735c37488216b95cafeb9a3d2979 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 21 Jul 2025 20:27:06 +0200 Subject: [PATCH 11/46] AArch64: implement is_integer_test/0 Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 2 +- tests/libs/jit/jit_aarch64_tests.erl | 51 ++++++++++++++++++++++++++-- tests/libs/jit/tests.erl | 1 + 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 9a4a41653..4683e4ab8 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -1467,7 +1467,7 @@ move_array_element( move_array_element( #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest ) when is_atom(Dest) andalso is_integer(Index) -> - I1 = jit_x86_64_asm:movq({Index * 8, Reg}, Dest), + I1 = jit_aarch64_asm:ldr(Dest, {Index * 8, Reg}), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; move_array_element( diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index cd5ccea2c..076dd34c9 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -322,7 +322,6 @@ get_list_test() -> State5 = ?BACKEND:free_native_registers(State4, [Reg]), ?BACKEND:assert_all_native_free(State5), Stream = ?BACKEND:stream(State5), - ok = file:write_file("dump.bin", Stream), Dump = << " 0: f9401807 ldr x7, [x0, #48]\n" " 4: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" @@ -336,8 +335,54 @@ get_list_test() -> ?assertEqual(dump_to_bin(Dump), Stream). is_integer_test() -> - %% TODO: Implement AArch64 version - ok. + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> + MSt1 = ?BACKEND:if_block( + MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ), + MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), + ?BACKEND:if_block( + MSt3, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + Offset = ?BACKEND:offset(State3), + Labels = [{Label, Offset + 16#100}], + State4 = ?BACKEND:update_branches(State3, Labels), + Stream = ?BACKEND:stream(State4), + ok = file:write_file("dump.bin", Stream), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: aa0703e8 mov x8, x7\n" + " 8: 92400d08 and x8, x8, #0xf\n" + " c: f1003d1f cmp x8, #0xf\n" + " 10: 54000180 b.eq 0x40 // b.none\n" + " 14: aa0703e8 mov x8, x7\n" + " 18: 92400508 and x8, x8, #0x3\n" + " 1c: f100091f cmp x8, #0x2\n" + " 20: 54000040 b.eq 0x28 // b.none\n" + " 24: 14000047 b 0x140\n" + " 28: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 2c: f94000e7 ldr x7, [x7]\n" + " 30: 924014e7 and x7, x7, #0x3f\n" + " 34: f10020ff cmp x7, #0x8\n" + " 38: 54000040 b.eq 0x40 // b.none\n" + " 3c: 14000041 b 0x140" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_test() -> %% TODO: Implement AArch64 version diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl index cc783796d..a435ab17e 100644 --- a/tests/libs/jit/tests.erl +++ b/tests/libs/jit/tests.erl @@ -27,6 +27,7 @@ start() -> etest:test([ jit_tests, + jit_aarch64_tests, jit_aarch64_asm_tests, jit_x86_64_tests, jit_x86_64_asm_tests From 8302ea5163ec9cfc163c052eac026dacaa7e01ff Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 21 Jul 2025 20:30:47 +0200 Subject: [PATCH 12/46] AArch64: implement is_boolean_test/0 Signed-off-by: Paul Guyot --- tests/libs/jit/jit_aarch64_tests.erl | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 076dd34c9..2444d877f 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -363,7 +363,6 @@ is_integer_test() -> Labels = [{Label, Offset + 16#100}], State4 = ?BACKEND:update_branches(State3, Labels), Stream = ?BACKEND:stream(State4), - ok = file:write_file("dump.bin", Stream), Dump = << " 0: f9401807 ldr x7, [x0, #48]\n" " 4: aa0703e8 mov x8, x7\n" @@ -385,8 +384,30 @@ is_integer_test() -> ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_test() -> - %% TODO: Implement AArch64 version - ok. + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + Offset = ?BACKEND:offset(State3), + Labels = [{Label, Offset + 16#100}], + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:update_branches(State3, Labels), + Stream = ?BACKEND:stream(State4), +% ok = file:write_file("dump.bin", Stream), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f1012cff cmp x7, #0x4b\n" + " 8: 54000080 b.eq 0x18 // b.none\n" + " c: f1002cff cmp x7, #0xb\n" + " 10: 54000040 b.eq 0x18 // b.none\n" + " 14: 14000041 b 0x118" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). call_ext_test() -> %% TODO: Implement AArch64 version From 2e126fe1482565f7744a7046cf7076e4e224077b Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 21 Jul 2025 22:38:00 +0200 Subject: [PATCH 13/46] AArch64: change order to match assembly and use unimplemented for unimpl cases Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 241 ++++++++++++----------- libs/jit/src/jit_aarch64_asm.erl | 66 ++++--- tests/libs/jit/jit_aarch64_asm_tests.erl | 82 ++++---- tests/libs/jit/jit_aarch64_tests.erl | 31 ++- 4 files changed, 232 insertions(+), 188 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 4683e4ab8..c7fc25c58 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -170,17 +170,17 @@ -define(CTX_REG, r0). -define(JITSTATE_REG, r1). -define(NATIVE_INTERFACE_REG, r2). --define(Y_REGS, {16#28, ?CTX_REG}). --define(X_REG(N), {16#30 + (N * 8), ?CTX_REG}). --define(CP, {16#B8, ?CTX_REG}). --define(FP_REGS, {16#C0, ?CTX_REG}). --define(BS, {16#C8, ?CTX_REG}). --define(BS_OFFSET, {16#D0, ?CTX_REG}). --define(JITSTATE_MODULE, {0, ?JITSTATE_REG}). --define(JITSTATE_CONTINUATION, {16#8, ?JITSTATE_REG}). --define(JITSTATE_REDUCTIONCOUNT, {16#10, ?JITSTATE_REG}). --define(PRIMITIVE(N), {N * 8, ?NATIVE_INTERFACE_REG}). --define(MODULE_INDEX(ModuleReg), {0, ModuleReg}). +-define(Y_REGS, {?CTX_REG, 16#28}). +-define(X_REG(N), {?CTX_REG, 16#30 + (N * 8)}). +-define(CP, {?CTX_REG, 16#B8}). +-define(FP_REGS, {?CTX_REG, 16#C0}). +-define(BS, {?CTX_REG, 16#C8}). +-define(BS_OFFSET, {?CTX_REG, 16#D0}). +-define(JITSTATE_MODULE, {?JITSTATE_REG, 0}). +-define(JITSTATE_CONTINUATION, {?JITSTATE_REG, 16#8}). +-define(JITSTATE_REDUCTIONCOUNT, {?JITSTATE_REG, 16#10}). +-define(PRIMITIVE(N), {?NATIVE_INTERFACE_REG, N * 8}). +-define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}). % aarch64 ABI specific -define(LR_REG, r30). @@ -407,9 +407,9 @@ call_primitive( PrepCall = case Primitive of 0 -> - jit_aarch64_asm:ldr(?IP0_REG, {0, ?NATIVE_INTERFACE_REG}); + jit_aarch64_asm:ldr(?IP0_REG, {?NATIVE_INTERFACE_REG, 0}); N -> - jit_aarch64_asm:ldr(?IP0_REG, {N * 8, ?NATIVE_INTERFACE_REG}) + jit_aarch64_asm:ldr(?IP0_REG, {?NATIVE_INTERFACE_REG, N * 8}) end, Stream1 = StreamModule:append(Stream0, PrepCall), StateCall = State#state{stream = Stream1}, @@ -444,9 +444,9 @@ call_primitive_last( PrepCall = case Primitive of 0 -> - jit_aarch64_asm:ldr(Temp, {0, ?NATIVE_INTERFACE_REG}); + jit_aarch64_asm:ldr(Temp, {?NATIVE_INTERFACE_REG, 0}); N -> - jit_aarch64_asm:ldr(Temp, {N * 8, ?NATIVE_INTERFACE_REG}) + jit_aarch64_asm:ldr(Temp, {?NATIVE_INTERFACE_REG, N * 8}) end, Stream1 = StreamModule:append(Stream0, PrepCall), State1 = set_args( @@ -1014,9 +1014,9 @@ call_func_ptr( PrepCall = case Primitive of 0 -> - jit_aarch64_asm:ldr(?IP0_REG, {0, ?NATIVE_INTERFACE_REG}); + jit_aarch64_asm:ldr(?IP0_REG, {?NATIVE_INTERFACE_REG, 0}); N -> - jit_aarch64_asm:ldr(?IP0_REG, {N * 8, ?NATIVE_INTERFACE_REG}) + jit_aarch64_asm:ldr(?IP0_REG, {?NATIVE_INTERFACE_REG, N * 8}) end, {?IP0_REG, StreamModule:append(Stream2, PrepCall)} end, @@ -1055,22 +1055,22 @@ call_func_ptr( }. push_registers([RegA, RegB | Tail], StreamModule, Stream0) -> - Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:stp_x(RegA, RegB, {sp, -16}, '!')), + Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:stp(RegA, RegB, {sp, -16}, '!')), push_registers(Tail, StreamModule, Stream1); push_registers([], _StreamModule, Stream0) -> {false, Stream0}; push_registers([RegA], StreamModule, Stream0) -> - Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:str_x(RegA, {sp, -16}, '!')), + Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:str(RegA, {sp, -16}, '!')), {true, Stream1}. pop_registers(true, [Reg | Tail], StreamModule, Stream0) -> % Odd number of registers, pop the last one first - Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:ldr_x(Reg, {sp}, 16)), + Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:ldr(Reg, {sp}, 16)), pop_registers(false, Tail, StreamModule, Stream1); pop_registers(false, [], _StreamModule, Stream0) -> Stream0; pop_registers(false, [RegB, RegA | Tail], StreamModule, Stream0) -> - Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:ldp_x(RegA, RegB, {sp}, 16)), + Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:ldp(RegA, RegB, {sp}, 16)), pop_registers(false, Tail, StreamModule, Stream1). -spec set_args(state(), [arg()]) -> state(). @@ -1196,7 +1196,7 @@ set_args1({x_reg, extra}, Reg) -> set_args1({x_reg, X}, Reg) -> jit_aarch64_asm:ldr(Reg, ?X_REG(X)); set_args1({ptr, Source}, Reg) -> - jit_aarch64_asm:ldr(Reg, {0, Source}); + jit_aarch64_asm:ldr(Reg, {Source, 0}); set_args1({y_reg, X}, Reg) -> [ jit_aarch64_asm:mov(Reg, ?Y_REGS), @@ -1226,7 +1226,7 @@ move_to_vm_register( Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; move_to_vm_register(#state{stream_module = StreamModule, stream = Stream0} = State, 0, {ptr, Reg}) -> - I1 = jit_aarch64_asm:str(xzr, {0, Reg}), + I1 = jit_aarch64_asm:str(xzr, {Reg, 0}), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; move_to_vm_register( @@ -1248,7 +1248,7 @@ move_to_vm_register( ?IS_SINT32_T(N) -> I1 = jit_aarch64_asm:mov(Temp, N), - I2 = jit_aarch64_asm:str(Temp, {0, Reg}), + I2 = jit_aarch64_asm:str(Temp, {Reg, 0}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_vm_register( @@ -1270,7 +1270,7 @@ move_to_vm_register( is_integer(N) -> I1 = jit_aarch64_asm:mov(Temp, N), - I2 = jit_aarch64_asm:str(Temp, {0, Reg}), + I2 = jit_aarch64_asm:str(Temp, {Reg, 0}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_vm_register( @@ -1279,7 +1279,7 @@ move_to_vm_register( {y_reg, Y} ) -> I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), - I2 = jit_aarch64_asm:str(xzr, {Y * 8, Temp}), + I2 = jit_aarch64_asm:str(xzr, {Temp, Y * 8}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_vm_register( @@ -1290,7 +1290,7 @@ move_to_vm_register( ) when ?IS_SINT32_T(N) -> I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), I2 = jit_aarch64_asm:mov(Temp2, N), - I3 = jit_aarch64_asm:str(Temp2, {Y * 8, Temp1}), + I3 = jit_aarch64_asm:str(Temp2, {Temp1, Y * 8}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_vm_register( @@ -1303,7 +1303,7 @@ move_to_vm_register( -> I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), I2 = jit_aarch64_asm:mov(Temp2, N), - I3 = jit_aarch64_asm:str(Temp2, {X * 8, Temp1}), + I3 = jit_aarch64_asm:str(Temp2, {Temp1, X * 8}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_vm_register( @@ -1321,7 +1321,7 @@ move_to_vm_register( {ptr, Reg} ) when X < ?MAX_REG -> I1 = jit_aarch64_asm:ldr(Temp, ?X_REG(X)), - I2 = jit_aarch64_asm:str(Temp, {0, Reg}), + I2 = jit_aarch64_asm:str(Temp, {Reg, 0}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_vm_register( @@ -1332,7 +1332,7 @@ move_to_vm_register( ) when X < ?MAX_REG -> I1 = jit_aarch64_asm:ldr(Temp1, ?X_REG(X)), I2 = jit_aarch64_asm:ldr(Temp2, ?Y_REGS), - I3 = jit_aarch64_asm:str(Temp1, {Y * 8, Temp2}), + I3 = jit_aarch64_asm:str(Temp1, {Temp2, Y * 8}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_vm_register( @@ -1341,7 +1341,7 @@ move_to_vm_register( {x_reg, X} ) when X < ?MAX_REG -> I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), - I2 = jit_aarch64_asm:ldr(Temp, {Y * 8, Temp}), + I2 = jit_aarch64_asm:ldr(Temp, {Temp, Y * 8}), I3 = jit_aarch64_asm:str(Temp, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; @@ -1351,8 +1351,8 @@ move_to_vm_register( {ptr, Reg} ) -> I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), - I2 = jit_aarch64_asm:ldr(Temp, {Y * 8, Temp}), - I3 = jit_aarch64_asm:str(Temp, {0, Reg}), + I2 = jit_aarch64_asm:ldr(Temp, {Temp, Y * 8}), + I3 = jit_aarch64_asm:str(Temp, {Reg, 0}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_vm_register( @@ -1362,8 +1362,8 @@ move_to_vm_register( {y_reg, YD} ) -> I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), - I2 = jit_aarch64_asm:ldr(Temp2, {YS * 8, Temp1}), - I3 = jit_aarch64_asm:str(Temp2, {YD * 8, Temp1}), + I2 = jit_aarch64_asm:ldr(Temp2, {Temp1, YS * 8}), + I3 = jit_aarch64_asm:str(Temp2, {Temp1, YD * 8}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_vm_register( @@ -1375,7 +1375,7 @@ move_to_vm_register( move_to_vm_register( #state{stream_module = StreamModule, stream = Stream0} = State, Reg, {ptr, Dest} ) when is_atom(Reg) -> - I1 = jit_aarch64_asm:str(Reg, {0, Dest}), + I1 = jit_aarch64_asm:str(Reg, {Dest, 0}), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; move_to_vm_register( @@ -1384,7 +1384,7 @@ move_to_vm_register( {y_reg, Y} ) when is_atom(Reg) -> I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), - I2 = jit_aarch64_asm:str(Reg, {Y * 8, Temp}), + I2 = jit_aarch64_asm:str(Reg, {Temp, Y * 8}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; @@ -1395,8 +1395,8 @@ move_to_vm_register( {y_reg, Y} ) when ?IS_GPR(Reg) -> I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), - I2 = jit_aarch64_asm:ldr(Temp2, {0, Reg}), - I3 = jit_aarch64_asm:str(Temp2, {Y * 8, Temp1}), + I2 = jit_aarch64_asm:ldr(Temp2, {Reg, 0}), + I3 = jit_aarch64_asm:str(Temp2, {Temp1, Y * 8}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; @@ -1406,7 +1406,7 @@ move_to_vm_register( {fp_reg, F} ) when is_atom(Reg) -> I1 = jit_aarch64_asm:ldr(Temp, ?FP_REGS), - I2 = jit_aarch64_asm:str(Reg, {F * 8, Temp}), + I2 = jit_aarch64_asm:str(Reg, {Temp, F * 8}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}. @@ -1424,8 +1424,8 @@ move_array_element( Index, {x_reg, X} ) when X < ?MAX_REG andalso is_integer(Index) -> - I1 = jit_x86_64_asm:movq({Index * 8, Reg}, Temp), - I2 = jit_x86_64_asm:movq(Temp, ?X_REG(X)), + I1 = jit_x86_64_asm_unimplemented:movq({Index * 8, Reg}, Temp), + I2 = jit_x86_64_asm_unimplemented:movq(Temp, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_array_element( @@ -1434,8 +1434,8 @@ move_array_element( Index, {ptr, Dest} ) when is_integer(Index) -> - I1 = jit_x86_64_asm:movq({Index * 8, Reg}, Temp), - I2 = jit_x86_64_asm:movq(Temp, {0, Dest}), + I1 = jit_x86_64_asm_unimplemented:movq({Index * 8, Reg}, Temp), + I2 = jit_x86_64_asm_unimplemented:movq(Temp, {0, Dest}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_array_element( @@ -1446,8 +1446,8 @@ move_array_element( {y_reg, Y} ) when is_integer(Index) -> I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), - I2 = jit_aarch64_asm:ldr(Temp2, {Index * 8, Reg}), - I3 = jit_aarch64_asm:str(Temp2, {Y * 8, Temp1}), + I2 = jit_aarch64_asm:ldr(Temp2, {Reg, Index * 8}), + I3 = jit_aarch64_asm:str(Temp2, {Temp1, Y * 8}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; @@ -1458,16 +1458,16 @@ move_array_element( Index, {y_reg, Y} ) when is_integer(Index) -> - I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), - I2 = jit_x86_64_asm:movq({Index * 8, Reg}, Reg), - I3 = jit_x86_64_asm:movq(Reg, {Y * 8, Temp}), + I1 = jit_x86_64_asm_unimplemented:movq(?Y_REGS, Temp), + I2 = jit_x86_64_asm_unimplemented:movq({Index * 8, Reg}, Reg), + I3 = jit_x86_64_asm_unimplemented:movq(Reg, {Y * 8, Temp}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; move_array_element( #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest ) when is_atom(Dest) andalso is_integer(Index) -> - I1 = jit_aarch64_asm:ldr(Dest, {Index * 8, Reg}), + I1 = jit_aarch64_asm:ldr(Dest, {Reg, Index * 8}), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; move_array_element( @@ -1482,10 +1482,10 @@ move_array_element( {free, IndexReg}, {x_reg, X} ) when X < ?MAX_REG andalso is_atom(IndexReg) -> - I1 = jit_x86_64_asm:shlq(3, IndexReg), - I2 = jit_x86_64_asm:addq(Reg, IndexReg), - I3 = jit_x86_64_asm:movq({0, IndexReg}, IndexReg), - I4 = jit_x86_64_asm:movq(IndexReg, ?X_REG(X)), + I1 = jit_x86_64_asm_unimplemented:shlq(3, IndexReg), + I2 = jit_x86_64_asm_unimplemented:addq(Reg, IndexReg), + I3 = jit_x86_64_asm_unimplemented:movq({0, IndexReg}, IndexReg), + I4 = jit_x86_64_asm_unimplemented:movq(IndexReg, ?X_REG(X)), {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg ), @@ -1508,11 +1508,11 @@ move_array_element( {free, IndexReg}, {y_reg, Y} ) when ?IS_GPR(IndexReg) -> - I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), - I2 = jit_x86_64_asm:shlq(3, IndexReg), - I3 = jit_x86_64_asm:addq(Reg, IndexReg), - I4 = jit_x86_64_asm:movq({0, IndexReg}, IndexReg), - I5 = jit_x86_64_asm:movq(IndexReg, {Y * 8, Temp}), + I1 = jit_x86_64_asm_unimplemented:movq(?Y_REGS, Temp), + I2 = jit_x86_64_asm_unimplemented:shlq(3, IndexReg), + I3 = jit_x86_64_asm_unimplemented:addq(Reg, IndexReg), + I4 = jit_x86_64_asm_unimplemented:movq({0, IndexReg}, IndexReg), + I5 = jit_x86_64_asm_unimplemented:movq(IndexReg, {Y * 8, Temp}), {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg ), @@ -1539,7 +1539,7 @@ get_array_element( Reg, Index ) -> - I1 = jit_x86_64_asm:movq({Index * 8, Reg}, ElemReg), + I1 = jit_x86_64_asm_unimplemented:movq({Index * 8, Reg}, ElemReg), Stream1 = StreamModule:append(Stream0, <>), { State#state{ @@ -1558,8 +1558,8 @@ move_to_array_element( Reg, Index ) when X < ?MAX_REG andalso ?IS_GPR(Reg) andalso is_integer(Index) -> - I1 = jit_x86_64_asm:movq(?X_REG(X), Temp), - I2 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), + I1 = jit_x86_64_asm_unimplemented:movq(?X_REG(X), Temp), + I2 = jit_x86_64_asm_unimplemented:movq(Temp, {Index * 8, Reg}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_array_element( @@ -1568,8 +1568,8 @@ move_to_array_element( Reg, IndexReg ) when X < ?MAX_REG andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> - I1 = jit_x86_64_asm:movq(?X_REG(X), Temp), - I2 = jit_x86_64_asm:movq(Temp, {0, Reg, IndexReg, 8}), + I1 = jit_x86_64_asm_unimplemented:movq(?X_REG(X), Temp), + I2 = jit_x86_64_asm_unimplemented:movq(Temp, {0, Reg, IndexReg, 8}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_array_element( @@ -1578,8 +1578,8 @@ move_to_array_element( Reg, Index ) -> - I1 = jit_x86_64_asm:movq({0, Source}, Temp), - I2 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), + I1 = jit_x86_64_asm_unimplemented:movq({0, Source}, Temp), + I2 = jit_x86_64_asm_unimplemented:movq(Temp, {Index * 8, Reg}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_array_element( @@ -1589,9 +1589,9 @@ move_to_array_element( Reg, Index ) when ?IS_GPR(Reg) andalso is_integer(Index) -> - I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), - I2 = jit_x86_64_asm:movq({Y * 8, Temp}, Temp), - I3 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), + I1 = jit_x86_64_asm_unimplemented:movq(?Y_REGS, Temp), + I2 = jit_x86_64_asm_unimplemented:movq({Y * 8, Temp}, Temp), + I3 = jit_x86_64_asm_unimplemented:movq(Temp, {Index * 8, Reg}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; @@ -1602,22 +1602,22 @@ move_to_array_element( Reg, IndexReg ) when ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> - I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), - I2 = jit_x86_64_asm:movq({Y * 8, Temp}, Temp), - I3 = jit_x86_64_asm:movq(Temp, {0, Reg, IndexReg, 8}), + I1 = jit_x86_64_asm_unimplemented:movq(?Y_REGS, Temp), + I2 = jit_x86_64_asm_unimplemented:movq({Y * 8, Temp}, Temp), + I3 = jit_x86_64_asm_unimplemented:movq(Temp, {0, Reg, IndexReg, 8}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; move_to_array_element( #state{stream_module = StreamModule, stream = Stream0} = State, Source, Reg, Index ) when ?IS_GPR(Source) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> - I1 = jit_x86_64_asm:movq(Source, {Index * 8, Reg}), + I1 = jit_x86_64_asm_unimplemented:movq(Source, {Index * 8, Reg}), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; move_to_array_element( #state{stream_module = StreamModule, stream = Stream0} = State, Source, Reg, Index ) when ?IS_SINT32_T(Source) andalso is_integer(Index) -> - I1 = jit_x86_64_asm:movq(Source, {Index * 8, Reg}), + I1 = jit_x86_64_asm_unimplemented:movq(Source, {Index * 8, Reg}), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; move_to_array_element( @@ -1626,8 +1626,8 @@ move_to_array_element( Reg, Index ) when is_integer(Source) andalso is_integer(Index) -> - I1 = jit_x86_64_asm:movabsq(Source, Temp), - I2 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), + I1 = jit_x86_64_asm_unimplemented:movabsq(Source, Temp), + I2 = jit_x86_64_asm_unimplemented:movq(Temp, {Index * 8, Reg}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}. @@ -1638,8 +1638,8 @@ move_to_array_element( IndexReg, Offset ) when X < ?MAX_REG andalso ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> - I1 = jit_x86_64_asm:movq(?X_REG(X), Temp), - I2 = jit_x86_64_asm:movq(Temp, {Offset, BaseReg, IndexReg, 8}), + I1 = jit_x86_64_asm_unimplemented:movq(?X_REG(X), Temp), + I2 = jit_x86_64_asm_unimplemented:movq(Temp, {Offset, BaseReg, IndexReg, 8}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_array_element( @@ -1649,9 +1649,9 @@ move_to_array_element( IndexReg, Offset ) when ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> - I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), - I2 = jit_x86_64_asm:movq({Y * 8, Temp}, Temp), - I3 = jit_x86_64_asm:movq(Temp, {Offset, BaseReg, IndexReg, 8}), + I1 = jit_x86_64_asm_unimplemented:movq(?Y_REGS, Temp), + I2 = jit_x86_64_asm_unimplemented:movq({Y * 8, Temp}, Temp), + I3 = jit_x86_64_asm_unimplemented:movq(Temp, {Offset, BaseReg, IndexReg, 8}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_array_element( @@ -1663,7 +1663,7 @@ move_to_array_element( ) when ?IS_GPR(Source) andalso ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> - I1 = jit_x86_64_asm:movq(Source, {Offset, BaseReg, IndexReg, 8}), + I1 = jit_x86_64_asm_unimplemented:movq(Source, {Offset, BaseReg, IndexReg, 8}), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; move_to_array_element( @@ -1676,7 +1676,7 @@ move_to_array_element( ?IS_SINT32_T(Source) andalso ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> - I1 = jit_x86_64_asm:movq(Source, {Offset, BaseReg, IndexReg, 8}), + I1 = jit_x86_64_asm_unimplemented:movq(Source, {Offset, BaseReg, IndexReg, 8}), Stream1 = StreamModule:append(Stream0, I1 / binary), State#state{stream = Stream1}; move_to_array_element( @@ -1694,7 +1694,7 @@ move_to_native_register(State, Reg) when is_atom(Reg) -> move_to_native_register( #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg} ) when is_atom(Reg) -> - I1 = jit_x86_64_asm:movq({0, Reg}, Reg), + I1 = jit_x86_64_asm_unimplemented:movq({0, Reg}, Reg), Stream1 = StreamModule:append(Stream0, I1), {State#state{stream = Stream1}, Reg}; move_to_native_register( @@ -1708,7 +1708,7 @@ move_to_native_register( ) when is_integer(Imm) -> - I1 = jit_x86_64_asm:movq(Imm, Reg), + I1 = jit_x86_64_asm_unimplemented:movq(Imm, Reg), Stream1 = StreamModule:append(Stream0, I1), {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; move_to_native_register( @@ -1734,8 +1734,8 @@ move_to_native_register( } = State, {y_reg, Y} ) -> - I1 = jit_x86_64_asm:movq(?Y_REGS, Reg), - I2 = jit_x86_64_asm:movq({Y * 8, Reg}, Reg), + I1 = jit_x86_64_asm_unimplemented:movq(?Y_REGS, Reg), + I2 = jit_x86_64_asm_unimplemented:movq({Y * 8, Reg}, Reg), Code = <>, Stream1 = StreamModule:append(Stream0, Code), {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}; @@ -1749,8 +1749,8 @@ move_to_native_register( } = State, {fp_reg, F} ) -> - I1 = jit_x86_64_asm:movq(?FP_REGS, Temp), - I2 = jit_x86_64_asm:movsd({F * 8, Temp}, FPReg), + I1 = jit_x86_64_asm_unimplemented:movq(?FP_REGS, Temp), + I2 = jit_x86_64_asm_unimplemented:movsd({F * 8, Temp}, FPReg), Code = <>, Stream1 = StreamModule:append(Stream0, Code), {State#state{stream = Stream1, available_fpregs = AvailFT, used_regs = [FPReg | Used]}, FPReg}. @@ -1759,13 +1759,13 @@ move_to_native_register( move_to_native_register( #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst ) when is_atom(RegSrc) orelse is_integer(RegSrc) -> - I = jit_x86_64_asm:movq(RegSrc, RegDst), + I = jit_x86_64_asm_unimplemented:movq(RegSrc, RegDst), Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; move_to_native_register( #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst ) when is_atom(Reg) -> - I1 = jit_x86_64_asm:movq({0, Reg}, RegDst), + I1 = jit_x86_64_asm_unimplemented:movq({0, Reg}, RegDst), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; move_to_native_register( @@ -1773,14 +1773,14 @@ move_to_native_register( ) when X < ?MAX_REG -> - I1 = jit_x86_64_asm:movq(?X_REG(X), RegDst), + I1 = jit_x86_64_asm_unimplemented:movq(?X_REG(X), RegDst), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; move_to_native_register( #state{stream_module = StreamModule, stream = Stream0} = State, {y_reg, Y}, RegDst ) -> - I1 = jit_x86_64_asm:movq(?Y_REGS, RegDst), - I2 = jit_x86_64_asm:movq({Y * 8, RegDst}, RegDst), + I1 = jit_x86_64_asm_unimplemented:movq(?Y_REGS, RegDst), + I2 = jit_x86_64_asm_unimplemented:movq({Y * 8, RegDst}, RegDst), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; @@ -1793,8 +1793,8 @@ move_to_native_register( {fp_reg, F}, RegDst ) -> - I1 = jit_x86_64_asm:movq(?FP_REGS, Temp), - I2 = jit_x86_64_asm:movsd({F * 8, Temp}, RegDst), + I1 = jit_x86_64_asm_unimplemented:movq(?FP_REGS, Temp), + I2 = jit_x86_64_asm_unimplemented:movsd({F * 8, Temp}, RegDst), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}. @@ -1809,7 +1809,7 @@ copy_to_native_register( } = State, Reg ) when is_atom(Reg) -> - I1 = jit_x86_64_asm:movq(Reg, SaveReg), + I1 = jit_x86_64_asm_unimplemented:movq(Reg, SaveReg), Stream1 = StreamModule:append(Stream0, I1), {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; copy_to_native_register( @@ -1821,7 +1821,7 @@ copy_to_native_register( } = State, {ptr, Reg} ) when is_atom(Reg) -> - I1 = jit_x86_64_asm:movq({0, Reg}, SaveReg), + I1 = jit_x86_64_asm_unimplemented:movq({0, Reg}, SaveReg), Stream1 = StreamModule:append(Stream0, I1), {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; copy_to_native_register(State, Reg) -> @@ -1832,7 +1832,7 @@ move_to_cp( {y_reg, Y} ) -> I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS), - I2 = jit_aarch64_asm:ldr(Reg, {Y * 8, Reg}), + I2 = jit_aarch64_asm:ldr(Reg, {Reg, Y * 8}), I3 = jit_aarch64_asm:str(Reg, ?CP), Code = <>, Stream1 = StreamModule:append(Stream0, Code), @@ -1876,9 +1876,9 @@ set_continuation_to_offset( ) -> OffsetRef = make_ref(), Offset = StreamModule:offset(Stream0), - {RewriteLEAOffset, I1} = jit_x86_64_asm:leaq_rel32({-4, rip}, Temp), + {RewriteLEAOffset, I1} = jit_x86_64_asm_unimplemented:leaq_rel32({-4, rip}, Temp), Reloc = {OffsetRef, Offset + RewriteLEAOffset, 32}, - I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION), + I2 = jit_x86_64_asm_unimplemented:movq(Temp, ?JITSTATE_CONTINUATION), Code = <>, Stream1 = StreamModule:append(Stream0, Code), {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}. @@ -1891,8 +1891,8 @@ get_module_index( used_regs = UsedRegs0 } = State ) -> - I1 = jit_x86_64_asm:movq(?JITSTATE_MODULE, Reg), - I2 = jit_x86_64_asm:movl(?MODULE_INDEX(Reg), Reg), + I1 = jit_aarch64_asm:ldr(Reg, ?JITSTATE_MODULE), + I2 = jit_aarch64_asm:ldr_w(Reg, ?MODULE_INDEX(Reg)), Code = <>, Stream1 = StreamModule:append(Stream0, Code), { @@ -1906,17 +1906,17 @@ and_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) - State#state{stream = Stream1}. or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - I1 = jit_x86_64_asm:orq(Val, Reg), + I1 = jit_x86_64_asm_unimplemented:orq(Val, Reg), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}. add(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - I1 = jit_x86_64_asm:addq(Val, Reg), + I1 = jit_x86_64_asm_unimplemented:addq(Val, Reg), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}. sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - I1 = jit_x86_64_asm:subq(Val, Reg), + I1 = jit_x86_64_asm_unimplemented:subq(Val, Reg), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}. @@ -1935,7 +1935,7 @@ mul(State, Reg, 32) -> mul(State, Reg, 64) -> shift_left(State, Reg, 6); mul(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - I1 = jit_x86_64_asm:imulq(Val, Reg), + I1 = jit_x86_64_asm_unimplemented:imulq(Val, Reg), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}. @@ -2015,16 +2015,16 @@ set_cp(State0) -> ), Offset = StreamModule:offset(Stream0), % build cp with module_index << 24 - I1 = jit_x86_64_asm:shlq(24, Reg), - % next part of cp is instruction offset, after the call. - {RewriteOffset, I2} = jit_x86_64_asm:orq_rel32(0, Reg), - AddrOffset = Offset + byte_size(I1) + RewriteOffset, - I3 = jit_x86_64_asm:movq(Reg, ?CP), - Code = <>, + I1 = jit_aarch64_asm:lsl(Reg, Reg, 24), + I2 = jit_aarch64_asm:mov(?IP0_REG, 0), + MOVOffset = Offset + byte_size(I1), + I3 = jit_aarch64_asm:orr(Reg, Reg, ?IP0_REG), + I4 = jit_aarch64_asm:str(Reg, ?CP), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), State2 = State1#state{stream = Stream1}, State3 = free_native_register(State2, Reg), - {State3, AddrOffset}. + {State3, MOVOffset}. -spec rewrite_cp_offset(state(), non_neg_integer()) -> state(). rewrite_cp_offset( @@ -2032,13 +2032,14 @@ rewrite_cp_offset( RewriteOffset ) -> NewOffset = StreamModule:offset(Stream0) - CodeOffset, - % Encode ReturnAddrOffset << 2 - Stream1 = StreamModule:replace(Stream0, RewriteOffset, <<(NewOffset bsl 2):32/little>>), + NewMoveInstr = jit_aarch64_asm:mov(?IP0_REG, NewOffset bsl 2), + ?ASSERT(byte_size(NewMoveInstr) =:= 4), + Stream1 = StreamModule:replace(Stream0, RewriteOffset, NewMoveInstr), State0#state{stream = Stream1}. set_bs(#state{stream_module = StreamModule, stream = Stream0} = State0, TermReg) -> - I1 = jit_x86_64_asm:movq(TermReg, ?BS), - I2 = jit_x86_64_asm:movq(0, ?BS_OFFSET), + I1 = jit_x86_64_asm_unimplemented:movq(TermReg, ?BS), + I2 = jit_x86_64_asm_unimplemented:movq(0, ?BS_OFFSET), Stream1 = StreamModule:append(Stream0, <>), State0#state{stream = Stream1}. @@ -2060,8 +2061,8 @@ return_labels_and_lines( SortedLabels, SortedLines ) -> - I2 = jit_x86_64_asm:retq(), - {_RewriteLEAOffset, I1} = jit_x86_64_asm:leaq_rel32({byte_size(I2), rip}, rax), + I2 = jit_x86_64_asm_unimplemented:retq(), + {_RewriteLEAOffset, I1} = jit_x86_64_asm_unimplemented:leaq_rel32({byte_size(I2), rip}, rax), LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, LinesTable = <<<> || {Line, Offset} <- SortedLines>>, Stream1 = StreamModule:append( diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index d85bd897f..33e1e4252 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -29,7 +29,8 @@ cmp32/2, and_/3, ldr/2, - ldr_x/3, + ldr_w/2, + ldr/3, lsl/3, lsr/3, mov/2, @@ -38,11 +39,11 @@ orr/3, ret/0, str/2, - str_x/3, + str/3, tst/2, tst32/2, - stp_x/4, - ldp_x/4, + stp/4, + ldp/4, subs/3, adr/2 ]). @@ -117,9 +118,9 @@ br(Reg) when is_atom(Reg) -> <<(16#D61F0000 bor (RegNum bsl 5)):32/little>>. %% Emit a load register (LDR) instruction for 64-bit load from memory (AArch64 encoding) -%% Dst is destination register atom, Src is {Offset, BaseReg} tuple --spec ldr(aarch64_gpr_register(), {integer(), aarch64_gpr_register()}) -> binary(). -ldr(Dst, {Offset, BaseReg}) when +%% Dst is destination register atom, Src is {BaseReg, Offset} tuple +-spec ldr(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary(). +ldr(Dst, {BaseReg, Offset}) when is_atom(Dst), is_atom(BaseReg), is_integer(Offset), @@ -135,6 +136,25 @@ ldr(Dst, {Offset, BaseReg}) when (16#F9400000 bor ((Offset div 8) bsl 10) bor (BaseRegNum bsl 5) bor DstNum):32/little >>. +%% Emit a load register (LDR) instruction for 32-bit load from memory (AArch64 encoding) +%% Dst is destination register atom, Src is {BaseReg, Offset} tuple +-spec ldr_w(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary(). +ldr_w(Dst, {BaseReg, Offset}) when + is_atom(Dst), + is_atom(BaseReg), + is_integer(Offset), + Offset >= 0, + Offset =< 32760, + (Offset rem 8) =:= 0 +-> + DstNum = reg_to_num(Dst), + BaseRegNum = reg_to_num(BaseReg), + %% AArch64 LDR (immediate) encoding for 64-bit: 11111001010iiiiiiiiiiibbbbbttttt + %% 0xf9400000 | (Offset div 8) << 10 | BaseReg << 5 | Dst + << + (16#B9400000 bor ((Offset div 4) bsl 10) bor (BaseRegNum bsl 5) bor DstNum):32/little + >>. + %% Emit a move immediate (MOV) instruction for various immediate sizes (AArch64 encoding) %% Dst is destination register atom, Imm is immediate value %% Returns a binary that may contain multiple instructions for complex immediates @@ -438,8 +458,8 @@ orr(DstReg, Rn, Rm) when is_atom(DstReg), is_atom(Rn), is_atom(Rm) -> >>. %% Emit a store register (STR) instruction for 64-bit store to memory --spec str(aarch64_gpr_register(), {integer(), aarch64_gpr_register()}) -> binary(). -str(SrcReg, {Offset, BaseReg}) when +-spec str(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary(). +str(SrcReg, {BaseReg, Offset}) when is_atom(SrcReg), is_atom(BaseReg), is_integer(Offset), @@ -456,16 +476,16 @@ str(SrcReg, {Offset, BaseReg}) when >>. %% Emit a store register (STR) instruction for 64-bit store to memory, with store-update (writeback) --spec str_x +-spec str (aarch64_gpr_register(), {aarch64_gpr_register(), integer()}, '!') -> binary(); (aarch64_gpr_register(), {aarch64_gpr_register()}, integer()) -> binary(). -str_x(Reg, {Base, Imm}, '!') when +str(Reg, {Base, Imm}, '!') when is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0 -> RegNum = reg_to_num(Reg), BaseNum = reg_to_num(Base), <<(16#F8000C00 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>; -str_x(Reg, {Base}, Imm) when +str(Reg, {Base}, Imm) when is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0 -> RegNum = reg_to_num(Reg), @@ -473,16 +493,16 @@ str_x(Reg, {Base}, Imm) when <<(16#F8000400 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>. %% Emit a load register (LDR) instruction for 64-bit store to memory, with store-update (writeback) --spec ldr_x +-spec ldr (aarch64_gpr_register(), {aarch64_gpr_register(), integer()}, '!') -> binary(); (aarch64_gpr_register(), {aarch64_gpr_register()}, integer()) -> binary(). -ldr_x(Reg, {Base, Imm}, '!') when +ldr(Reg, {Base, Imm}, '!') when is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0 -> RegNum = reg_to_num(Reg), BaseNum = reg_to_num(Base), <<(16#F8400C00 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>; -ldr_x(Reg, {Base}, Imm) when +ldr(Reg, {Base}, Imm) when is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0 -> RegNum = reg_to_num(Reg), @@ -490,15 +510,15 @@ ldr_x(Reg, {Base}, Imm) when <<(16#F8400400 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>. %% Emit a store pair (STP) instruction for 64-bit registers -%% stp_x(Rn, Rm, {Base}, Imm) -> binary() -%% stp_x(Rn, Rm, {Base, Imm}, '!') -> binary() (store-update) --spec stp_x( +%% stp(Rn, Rm, {Base}, Imm) -> binary() +%% stp(Rn, Rm, {Base, Imm}, '!') -> binary() (store-update) +-spec stp( aarch64_gpr_register(), aarch64_gpr_register(), {aarch64_gpr_register()} | {aarch64_gpr_register(), integer()}, integer() | '!' ) -> binary(). -stp_x(Rn, Rm, {Base}, Imm) when +stp(Rn, Rm, {Base}, Imm) when is_atom(Rn), is_atom(Rm), is_atom(Base), @@ -515,7 +535,7 @@ stp_x(Rn, Rm, {Base}, Imm) when << (16#A8800000 bor ((Imm div 8) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor RnNum):32/little >>; -stp_x(Rn, Rm, {Base, Imm}, '!') when +stp(Rn, Rm, {Base, Imm}, '!') when is_atom(Rn), is_atom(Rm), is_atom(Base), @@ -533,10 +553,10 @@ stp_x(Rn, Rm, {Base, Imm}, '!') when >>. %% Emit a load pair (LDP) instruction for 64-bit registers -%% ldp_x(Rn, Rm, {Base}, Imm) -> binary() --spec ldp_x(aarch64_gpr_register(), aarch64_gpr_register(), {aarch64_gpr_register()}, integer()) -> +%% ldp(Rn, Rm, {Base}, Imm) -> binary() +-spec ldp(aarch64_gpr_register(), aarch64_gpr_register(), {aarch64_gpr_register()}, integer()) -> binary(). -ldp_x(Rn, Rm, {Base}, Imm) when +ldp(Rn, Rm, {Base}, Imm) when is_atom(Rn), is_atom(Rm), is_atom(Base), diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index 55419ee4e..81a437392 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -60,8 +60,29 @@ br_test_() -> ldr_test_() -> [ - ?_assertEqual(<<16#F9400421:32/little>>, jit_aarch64_asm:ldr(r1, {8, r1})), - ?_assertEqual(<<16#F9403042:32/little>>, jit_aarch64_asm:ldr(r2, {96, r2})) + ?_assertEqual(<<16#F9400421:32/little>>, jit_aarch64_asm:ldr(r1, {r1, 8})), + ?_assertEqual(<<16#F9403042:32/little>>, jit_aarch64_asm:ldr(r2, {r2, 96})), + % Load-update (writeback) with SP, negative offset + ?_assertEqual( + <<16#F85F0FE7:32/little>>, + jit_aarch64_asm:ldr(r7, {sp, -16}, '!') + ), + % Load-update (writeback) with SP, positive offset + ?_assertEqual( + <<16#F8410FE7:32/little>>, + jit_aarch64_asm:ldr(r7, {sp, 16}, '!') + ), + % Load-update (writeback) with SP, zero offset + ?_assertEqual( + <<16#F84007E7:32/little>>, + jit_aarch64_asm:ldr(r7, {sp}, 0) + ) + ]. + +ldr_w_test_() -> + [ + ?_assertEqual(<<16#b9400821:32/little>>, jit_aarch64_asm:ldr_w(r1, {r1, 8})), + ?_assertEqual(<<16#b9406042:32/little>>, jit_aarch64_asm:ldr_w(r2, {r2, 96})) ]. mov_test_() -> @@ -102,30 +123,26 @@ orr_test_() -> str_test_() -> [ - ?_assertEqual(<<16#F9000421:32/little>>, jit_aarch64_asm:str(r1, {8, r1})), - ?_assertEqual(<<16#F9003042:32/little>>, jit_aarch64_asm:str(r2, {96, r2})), + ?_assertEqual(<<16#F9000421:32/little>>, jit_aarch64_asm:str(r1, {r1, 8})), + ?_assertEqual(<<16#F9003042:32/little>>, jit_aarch64_asm:str(r2, {r2, 96})), % str with xzr (zero register) - stores zero to memory - ?_assertEqual(<<16#F900001F:32/little>>, jit_aarch64_asm:str(xzr, {0, r0})), - ?_assertEqual(<<16#F900043F:32/little>>, jit_aarch64_asm:str(xzr, {8, r1})), - ?_assertEqual(<<16#F900085F:32/little>>, jit_aarch64_asm:str(xzr, {16, r2})) - ]. - -str_x_test_() -> - [ + ?_assertEqual(<<16#F900001F:32/little>>, jit_aarch64_asm:str(xzr, {r0, 0})), + ?_assertEqual(<<16#F900043F:32/little>>, jit_aarch64_asm:str(xzr, {r1, 8})), + ?_assertEqual(<<16#F900085F:32/little>>, jit_aarch64_asm:str(xzr, {r2, 16})), % Store-update (writeback) with SP ?_assertEqual( <<16#F81F0FE7:32/little>>, - jit_aarch64_asm:str_x(r7, {sp, -16}, '!') + jit_aarch64_asm:str(r7, {sp, -16}, '!') ), % Store-update (writeback) with SP, positive offset ?_assertEqual( <<16#F8010FE7:32/little>>, - jit_aarch64_asm:str_x(r7, {sp, 16}, '!') + jit_aarch64_asm:str(r7, {sp, 16}, '!') ), % Store-update (writeback) with SP, zero offset ?_assertEqual( <<16#F80007E7:32/little>>, - jit_aarch64_asm:str_x(r7, {sp}, 0) + jit_aarch64_asm:str(r7, {sp}, 0) ) ]. @@ -199,59 +216,40 @@ bcc_test_() -> ?_assertEqual(<<16#54000400:32/little>>, jit_aarch64_asm:bcc(eq, 128)) ]. -stp_x_test_() -> +stp_test_() -> [ ?_assertEqual( <<16#a8815113:32/little>>, - jit_aarch64_asm:stp_x(r19, r20, {r8}, 16) + jit_aarch64_asm:stp(r19, r20, {r8}, 16) ), ?_assertEqual( <<16#a88153f3:32/little>>, - jit_aarch64_asm:stp_x(r19, r20, {sp}, 16) + jit_aarch64_asm:stp(r19, r20, {sp}, 16) ), % Store-update (writeback) variants ?_assertEqual( <<16#a9bf27e8:32/little>>, - jit_aarch64_asm:stp_x(r8, r9, {sp, -16}, '!') + jit_aarch64_asm:stp(r8, r9, {sp, -16}, '!') ), ?_assertEqual( <<16#a98127e8:32/little>>, - jit_aarch64_asm:stp_x(r8, r9, {sp, 16}, '!') + jit_aarch64_asm:stp(r8, r9, {sp, 16}, '!') ), ?_assertEqual( <<16#a98027e8:32/little>>, - jit_aarch64_asm:stp_x(r8, r9, {sp, 0}, '!') + jit_aarch64_asm:stp(r8, r9, {sp, 0}, '!') ) ]. -ldp_x_test_() -> +ldp_test_() -> [ ?_assertEqual( <<16#a8c15113:32/little>>, - jit_aarch64_asm:ldp_x(r19, r20, {r8}, 16) + jit_aarch64_asm:ldp(r19, r20, {r8}, 16) ), ?_assertEqual( <<16#a8c153f3:32/little>>, - jit_aarch64_asm:ldp_x(r19, r20, {sp}, 16) - ) - ]. - -ldr_x_test_() -> - [ - % Load-update (writeback) with SP, negative offset - ?_assertEqual( - <<16#F85F0FE7:32/little>>, - jit_aarch64_asm:ldr_x(r7, {sp, -16}, '!') - ), - % Load-update (writeback) with SP, positive offset - ?_assertEqual( - <<16#F8410FE7:32/little>>, - jit_aarch64_asm:ldr_x(r7, {sp, 16}, '!') - ), - % Load-update (writeback) with SP, zero offset - ?_assertEqual( - <<16#F84007E7:32/little>>, - jit_aarch64_asm:ldr_x(r7, {sp}, 0) + jit_aarch64_asm:ldp(r19, r20, {sp}, 16) ) ]. diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 2444d877f..c4dff1cf0 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -398,7 +398,6 @@ is_boolean_test() -> ?BACKEND:assert_all_native_free(State3), State4 = ?BACKEND:update_branches(State3, Labels), Stream = ?BACKEND:stream(State4), -% ok = file:write_file("dump.bin", Stream), Dump = << " 0: f9401807 ldr x7, [x0, #48]\n" " 4: f1012cff cmp x7, #0x4b\n" @@ -410,8 +409,34 @@ is_boolean_test() -> ?assertEqual(dump_to_bin(Dump), Stream). call_ext_test() -> - %% TODO: Implement AArch64 version - ok. + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]), + ?BACKEND:assert_all_native_free(State2), + Stream = ?BACKEND:stream(State2), + ok = file:write_file("dump.bin", Stream), + Dump = << + " 0: f9400827 ldr x7, [x1, #16]\n" + " 4: f10004e7 subs x7, x7, #0x1\n" + " 8: f9000827 str x7, [x1, #16]\n" + " c: 540000a1 b.ne 0x20 // b.any\n" + " 10: 10000087 adr x7, 0x20\n" + " 14: f9000427 str x7, [x1, #8]\n" + " 18: f9400847 ldr x7, [x2, #16]\n" + " 1c: d61f00e0 br x7\n" + " 20: f9400027 ldr x7, [x1]\n" + " 24: b94000e7 ldr w7, [x7]\n" + " 28: d3689ce7 lsl x7, x7, #24\n" + " 2c: d2802610 mov x16, #0x130 // #304\n" + " 30: aa1000e7 orr x7, x7, x16\n" + " 34: f9005c07 str x7, [x0, #184]\n" + " 38: f9401047 ldr x7, [x2, #32]\n" + " 3c: d2800042 mov x2, #0x2 // #2\n" + " 40: d28000a3 mov x3, #0x5 // #5\n" + " 44: 92800004 mov x4, #0xffffffffffffffff // #-1\n" + " 48: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). call_fun_test() -> %% TODO: Implement AArch64 version From f0a8c7ee19977c0f59843fa7b6eb76b353371807 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 21 Jul 2025 22:50:58 +0200 Subject: [PATCH 14/46] AArch64: call_fun_test/0 and optimization of a condition Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 15 ++-- tests/libs/jit/jit_aarch64_tests.erl | 101 ++++++++++++++++++++++----- 2 files changed, 89 insertions(+), 27 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index c7fc25c58..22944ae84 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -822,22 +822,19 @@ if_block_cond( } = State0, {Reg, '&', Mask, '!=', Val} ) when ?IS_GPR(Reg) -> - % Move Reg to Temp - I1 = jit_aarch64_asm:orr(Temp, xzr, Reg), % AND with mask - I2 = jit_aarch64_asm:and_(Temp, Temp, Mask), + I1 = jit_aarch64_asm:and_(Temp, Reg, Mask), % Compare with value - I3 = jit_aarch64_asm:cmp(Temp, Val), - I4 = jit_aarch64_asm:bcc(eq, 0), + I2 = jit_aarch64_asm:cmp(Temp, Val), + I3 = jit_aarch64_asm:bcc(eq, 0), Code = << I1/binary, I2/binary, - I3/binary, - I4/binary + I3/binary >>, Stream1 = StreamModule:append(Stream0, Code), State1 = State0#state{stream = Stream1}, - {State1, eq, byte_size(I1) + byte_size(I2) + byte_size(I3)}; + {State1, eq, byte_size(I1) + byte_size(I2)}; if_block_cond( #state{ stream_module = StreamModule, @@ -1809,7 +1806,7 @@ copy_to_native_register( } = State, Reg ) when is_atom(Reg) -> - I1 = jit_x86_64_asm_unimplemented:movq(Reg, SaveReg), + I1 = jit_aarch64_asm:mov(SaveReg, Reg), Stream1 = StreamModule:append(Stream0, I1), {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; copy_to_native_register( diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index c4dff1cf0..6e30f20e2 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -365,21 +365,19 @@ is_integer_test() -> Stream = ?BACKEND:stream(State4), Dump = << " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: aa0703e8 mov x8, x7\n" - " 8: 92400d08 and x8, x8, #0xf\n" - " c: f1003d1f cmp x8, #0xf\n" - " 10: 54000180 b.eq 0x40 // b.none\n" - " 14: aa0703e8 mov x8, x7\n" - " 18: 92400508 and x8, x8, #0x3\n" - " 1c: f100091f cmp x8, #0x2\n" - " 20: 54000040 b.eq 0x28 // b.none\n" - " 24: 14000047 b 0x140\n" - " 28: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" - " 2c: f94000e7 ldr x7, [x7]\n" - " 30: 924014e7 and x7, x7, #0x3f\n" - " 34: f10020ff cmp x7, #0x8\n" - " 38: 54000040 b.eq 0x40 // b.none\n" - " 3c: 14000041 b 0x140" + " 4: 92400ce8 and x8, x7, #0xf\n" + " 8: f1003d1f cmp x8, #0xf\n" + " c: 54000160 b.eq 0x38 // b.none\n" + " 10: 924004e8 and x8, x7, #0x3\n" + " 14: f100091f cmp x8, #0x2\n" + " 18: 54000040 b.eq 0x20 // b.none\n" + " 1c: 14000047 b 0x138\n" + " 20: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 24: f94000e7 ldr x7, [x7]\n" + " 28: 924014e7 and x7, x7, #0x3f\n" + " 2c: f10020ff cmp x7, #0x8\n" + " 30: 54000040 b.eq 0x38 // b.none\n" + " 34: 14000041 b 0x138" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -414,7 +412,6 @@ call_ext_test() -> State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]), ?BACKEND:assert_all_native_free(State2), Stream = ?BACKEND:stream(State2), - ok = file:write_file("dump.bin", Stream), Dump = << " 0: f9400827 ldr x7, [x1, #16]\n" " 4: f10004e7 subs x7, x7, #0x1\n" @@ -439,10 +436,78 @@ call_ext_test() -> ?assertEqual(dump_to_bin(Dump), Stream). call_fun_test() -> - %% TODO: Implement AArch64 version - ok. + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + FuncReg = {x_reg, 0}, + ArgsCount = 0, + {State2, Reg} = ?BACKEND:move_to_native_register(State1, FuncReg), + {State3, RegCopy} = ?BACKEND:copy_to_native_register(State2, Reg), + State4 = ?BACKEND:if_block( + State3, {RegCopy, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK), + State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), + State7 = ?BACKEND:if_block( + State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State8 = ?BACKEND:free_native_registers(State7, [RegCopy]), + State9 = ?BACKEND:call_primitive_with_cp(State8, ?PRIM_CALL_FUN, [ + ctx, jit_state, Reg, ArgsCount + ]), + ?BACKEND:assert_all_native_free(State9), + Stream = ?BACKEND:stream(State9), + Dump = << + " 0: f9400827 ldr x7, [x1, #16]\n" + " 4: f10004e7 subs x7, x7, #0x1\n" + " 8: f9000827 str x7, [x1, #16]\n" + " c: 540000a1 b.ne 0x20 // b.any\n" + " 10: 10000087 adr x7, 0x20\n" + " 14: f9000427 str x7, [x1, #8]\n" + " 18: f9400847 ldr x7, [x2, #16]\n" + " 1c: d61f00e0 br x7\n" + " 20: f9401807 ldr x7, [x0, #48]\n" + " 24: aa0703e8 mov x8, x7\n" + " 28: 92400509 and x9, x8, #0x3\n" + " 2c: f100093f cmp x9, #0x2\n" + " 30: 540000c0 b.eq 0x48 // b.none\n" + " 34: f9404c47 ldr x7, [x2, #152]\n" + " 38: d2800702 mov x2, #0x38 // #56\n" + " 3c: d2804163 mov x3, #0x20b // #523\n" + " 40: aa0803e4 mov x4, x8\n" + " 44: d61f00e0 br x7\n" + " 48: 927ef508 and x8, x8, #0xfffffffffffffffc\n" + " 4c: f9400108 ldr x8, [x8]\n" + " 50: 92401509 and x9, x8, #0x3f\n" + " 54: f100513f cmp x9, #0x14\n" + " 58: 540000c0 b.eq 0x70 // b.none\n" + " 5c: f9404c47 ldr x7, [x2, #152]\n" + " 60: d2800c02 mov x2, #0x60 // #96\n" + " 64: d2804163 mov x3, #0x20b // #523\n" + " 68: aa0803e4 mov x4, x8\n" + " 6c: d61f00e0 br x7\n" + " 70: f9400028 ldr x8, [x1]\n" + " 74: b9400108 ldr w8, [x8]\n" + " 78: d3689d08 lsl x8, x8, #24\n" + " 7c: d2804c10 mov x16, #0x260 // #608\n" + " 80: aa100108 orr x8, x8, x16\n" + " 84: f9005c08 str x8, [x0, #184]\n" + " 88: f9408048 ldr x8, [x2, #256]\n" + " 8c: aa0703e2 mov x2, x7\n" + " 90: d2800003 mov x3, #0x0 // #0\n" + " 94: d61f0100 br x8" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). move_to_vm_register_test_() -> + % ok = file:write_file("dump.bin", Stream), %% TODO: Implement AArch64 version []. From 9789e5c864d29c6dc23ef593dec32b4d22eb86fb Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 21 Jul 2025 23:21:54 +0200 Subject: [PATCH 15/46] AArch64: move_to_vm_register Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 248 +++++++-------------------- tests/libs/jit/jit_aarch64_tests.erl | 143 ++++++++++++++- tests/libs/jit/jit_x86_64_tests.erl | 2 +- 3 files changed, 206 insertions(+), 187 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 22944ae84..66c1f42a9 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -1208,195 +1208,69 @@ set_args1(Arg, Reg) when is_integer(Arg) -> %% For now, just use the immediate (may need expansion later) jit_aarch64_asm:mov(Reg, Arg). -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0} = State, 0, {x_reg, X} -) when - X < ?MAX_REG --> - I1 = jit_aarch64_asm:str(xzr, ?X_REG(X)), - Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0} = State, 0, {x_reg, extra} -) -> - I1 = jit_aarch64_asm:str(xzr, ?X_REG(?MAX_REG)), - Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; -move_to_vm_register(#state{stream_module = StreamModule, stream = Stream0} = State, 0, {ptr, Reg}) -> - I1 = jit_aarch64_asm:str(xzr, {Reg, 0}), - Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, - N, - {x_reg, X} -) when - X < ?MAX_REG andalso ?IS_SINT32_T(N) --> - I1 = jit_aarch64_asm:mov(Temp, N), - I2 = jit_aarch64_asm:str(Temp, ?X_REG(X)), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, - N, - {ptr, Reg} -) when - ?IS_SINT32_T(N) --> - I1 = jit_aarch64_asm:mov(Temp, N), - I2 = jit_aarch64_asm:str(Temp, {Reg, 0}), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, - N, - {x_reg, X} -) when - X < ?MAX_REG andalso is_integer(N) +%%----------------------------------------------------------------------------- +%% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg) +%% from an immediate, a native register or another vm register. +%% @end +%% @param State current backend state +%% @param Src value to move to vm register +%% @param Dest vm register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_to_vm_register(state(), Src :: value() | vm_register(), Dest :: vm_register()) -> + state(). +% Native register to VM register +move_to_vm_register(State0, Src, {x_reg, extra}) when is_atom(Src) -> + I1 = jit_aarch64_asm:str(Src, ?X_REG(?MAX_REG)), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {x_reg, X}) when is_atom(Src) -> + I1 = jit_aarch64_asm:str(Src, ?X_REG(X)), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) -> + I1 = jit_aarch64_asm:str(Src, {Reg, 0}), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(#state{available_regs = [Temp | _]} = State0, Src, {y_reg, Y}) when + is_atom(Src) -> - I1 = jit_aarch64_asm:mov(Temp, N), - I2 = jit_aarch64_asm:str(Temp, ?X_REG(X)), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, - N, - {ptr, Reg} -) when + I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), + I2 = jit_aarch64_asm:str(Src, {Temp, Y * 8}), + Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), + State0#state{stream = Stream1}; +% Source is an integer +move_to_vm_register(State, 0, Dest) -> + move_to_vm_register(State, xzr, Dest); +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when is_integer(N) -> I1 = jit_aarch64_asm:mov(Temp, N), - I2 = jit_aarch64_asm:str(Temp, {Reg, 0}), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, - 0, - {y_reg, Y} -) -> - I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), - I2 = jit_aarch64_asm:str(xzr, {Temp, Y * 8}), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | _]} = - State, - N, - {y_reg, Y} -) when ?IS_SINT32_T(N) -> - I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), - I2 = jit_aarch64_asm:mov(Temp2, N), - I3 = jit_aarch64_asm:str(Temp2, {Temp1, Y * 8}), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | _]} = - State, - N, - {y_reg, X} -) when - X < 32 andalso is_integer(N) --> - I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), - I2 = jit_aarch64_asm:mov(Temp2, N), - I3 = jit_aarch64_asm:str(Temp2, {Temp1, X * 8}), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, - {x_reg, X}, - {x_reg, Y} -) when X < ?MAX_REG andalso Y < ?MAX_REG -> + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +% Source is a VM register +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) -> + I1 = jit_aarch64_asm:ldr(Temp, ?X_REG(?MAX_REG)), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) -> I1 = jit_aarch64_asm:ldr(Temp, ?X_REG(X)), - I2 = jit_aarch64_asm:str(Temp, ?X_REG(Y)), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, - {x_reg, X}, - {ptr, Reg} -) when X < ?MAX_REG -> - I1 = jit_aarch64_asm:ldr(Temp, ?X_REG(X)), - I2 = jit_aarch64_asm:str(Temp, {Reg, 0}), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | _]} = - State, - {x_reg, X}, - {y_reg, Y} -) when X < ?MAX_REG -> - I1 = jit_aarch64_asm:ldr(Temp1, ?X_REG(X)), - I2 = jit_aarch64_asm:ldr(Temp2, ?Y_REGS), - I3 = jit_aarch64_asm:str(Temp1, {Temp2, Y * 8}), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, - {y_reg, Y}, - {x_reg, X} -) when X < ?MAX_REG -> - I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), - I2 = jit_aarch64_asm:ldr(Temp, {Temp, Y * 8}), - I3 = jit_aarch64_asm:str(Temp, ?X_REG(X)), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, - {y_reg, Y}, - {ptr, Reg} -) -> + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) -> + I1 = jit_aarch64_asm:ldr(Temp, {Reg, 0}), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) -> I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), I2 = jit_aarch64_asm:ldr(Temp, {Temp, Y * 8}), - I3 = jit_aarch64_asm:str(Temp, {Reg, 0}), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | _]} = - State, - {y_reg, YS}, - {y_reg, YD} -) -> - I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), - I2 = jit_aarch64_asm:ldr(Temp2, {Temp1, YS * 8}), - I3 = jit_aarch64_asm:str(Temp2, {Temp1, YD * 8}), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0} = State, Reg, {x_reg, X} -) when is_atom(Reg) andalso X < ?MAX_REG -> - I1 = jit_aarch64_asm:str(Reg, ?X_REG(X)), - Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, stream = Stream0} = State, Reg, {ptr, Dest} -) when is_atom(Reg) -> - I1 = jit_aarch64_asm:str(Reg, {Dest, 0}), - Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, available_regs = [Temp | _], stream = Stream0} = State, - Reg, - {y_reg, Y} -) when is_atom(Reg) -> - I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), - I2 = jit_aarch64_asm:str(Reg, {Temp, Y * 8}), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1}; -move_to_vm_register( - #state{stream_module = StreamModule, available_regs = [Temp1, Temp2 | _], stream = Stream0} = - State, - {ptr, Reg}, - {y_reg, Y} -) when ?IS_GPR(Reg) -> - I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), - I2 = jit_aarch64_asm:ldr(Temp2, {Reg, 0}), - I3 = jit_aarch64_asm:str(Temp2, {Temp1, Y * 8}), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1}; + Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; move_to_vm_register( #state{stream_module = StreamModule, available_regs = [Temp | _], stream = Stream0} = State, Reg, @@ -1408,7 +1282,15 @@ move_to_vm_register( Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}. -%% @doc move reg[x] to a vm or native register +%%----------------------------------------------------------------------------- +%% @doc Emit a move of an array element (reg[x]) to a vm or a native register. +%% @end +%% @param State current backend state +%% @param Reg base register of the array +%% @param Index index in the array, as an integer or a native register +%% @param Dest vm or native register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- -spec move_array_element( state(), aarch64_register(), diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 6e30f20e2..2aa5f43fc 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -506,10 +506,147 @@ call_fun_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +move_to_vm_register_test1(State, Source, Dest, Dump) -> + State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), + Stream = ?BACKEND:stream(State1), + ok = file:write_file("dump.bin", Stream), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test0(State, Source, Dest, Dump) -> + State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + move_to_vm_register_test_() -> - % ok = file:write_file("dump.bin", Stream), - %% TODO: Implement AArch64 version - []. + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% Test: Immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, 0}, << + " 0: d2800547 mov x7, #0x2a // #42\n" + " 4: f9001807 str x7, [x0, #48]" + >>) + end), + %% Test: Immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 99, {ptr, r10}, << + " 0: d2800c67 mov x7, #0x63 // #99\n" + " 4: f9000147 str x7, [x10]" + >>) + end), + %% Test: x_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << + " 0: f9401c07 ldr x7, [x0, #56]\n" + " 4: f9002007 str x7, [x0, #64]" + >>) + end), + %% Test: x_reg to ptr + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, r8}, << + " 0: f9401c07 ldr x7, [x0, #56]\n" + " 4: f9000107 str x7, [x8]" + >>) + end), + %% Test: ptr to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {ptr, r9}, {x_reg, 3}, << + " 0: f9400127 ldr x7, [x9]\n" + " 4: f9002407 str x7, [x0, #72]" + >>) + end), + %% Test: x_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401408 ldr x8, [x0, #40]\n" + " 8: f9000507 str x7, [x8, #8]" + >>) + end), + %% Test: y_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f94000e7 ldr x7, [x7]\n" + " 8: f9002407 str x7, [x0, #72]" + >>) + end), + %% Test: y_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f94004e7 ldr x7, [x7, #8]\n" + " 8: f9002407 str x7, [x0, #72]" + >>) + end), + %% Test: Native register to x_reg + ?_test(begin + move_to_vm_register_test0(State0, r10, {x_reg, 0}, << + " 0: f900180a str x10, [x0, #48]" + >>) + end), + %% Test: Native register to ptr + ?_test(begin + move_to_vm_register_test0(State0, r9, {ptr, r10}, << + " 0: f9000149 str x9, [x10]" + >>) + end), + %% Test: Native register to y_reg + ?_test(begin + move_to_vm_register_test0(State0, r10, {y_reg, 0}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f90000ea str x10, [x7]" + >>) + end), + %% Test: Large immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, 16#123456789abcdef0, {x_reg, 0}, << + " 0: d29bde07 mov x7, #0xdef0 // #57072\n" + " 4: f2b35787 movk x7, #0x9abc, lsl #16\n" + " 8: f2cacf07 movk x7, #0x5678, lsl #32\n" + " c: f2e24687 movk x7, #0x1234, lsl #48\n" + " 10: f9001807 str x7, [x0, #48]" + >>) + end), + %% Test: Large immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 16#123456789abcdef0, {ptr, r10}, << + " 0: d29bde07 mov x7, #0xdef0 // #57072\n" + " 4: f2b35787 movk x7, #0x9abc, lsl #16\n" + " 8: f2cacf07 movk x7, #0x5678, lsl #32\n" + " c: f2e24687 movk x7, #0x1234, lsl #48\n" + " 10: f9000147 str x7, [x10]" + >>) + end), + %% Test: x_reg to y_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << + " 0: f9405407 ldr x7, [x0, #168]\n" + " 4: f9401408 ldr x8, [x0, #40]\n" + " 8: f9007d07 str x7, [x8, #248]" + >>) + end), + %% Test: y_reg to x_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f9407ce7 ldr x7, [x7, #248]\n" + " 8: f9005407 str x7, [x0, #168]" + >>) + end), + %% Test: Negative immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, -1, {x_reg, 0}, << + " 0: 92800007 mov x7, #0xffffffffffffffff // #-1\n" + " 4: f9001807 str x7, [x0, #48]" + >>) + end) + ] + end}. move_array_element_test_() -> %% TODO: Implement AArch64 version diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl index 7ccb67889..c309cae9e 100644 --- a/tests/libs/jit/jit_x86_64_tests.erl +++ b/tests/libs/jit/jit_x86_64_tests.erl @@ -1269,7 +1269,7 @@ move_to_vm_register_test_() -> " 0: 49 89 02 mov %rax,(%r10)" >>) end), - %% Test: Atom register to y_reg + %% Test: Native register to y_reg ?_test(begin move_to_vm_register_test0(State0, rax, {y_reg, 0}, << " 0:\t48 8b 47 28 mov 0x28(%rdi),%rax\n" From 55c84c6ced68052b20dc9c0ac54e953a2b841a79 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 22 Jul 2025 07:48:10 +0200 Subject: [PATCH 16/46] AArch64: move_array_element Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 64 +++++++++++------ libs/jit/src/jit_aarch64_asm.erl | 37 ++++++++++ tests/libs/jit/jit_aarch64_asm_tests.erl | 16 ++++- tests/libs/jit/jit_aarch64_tests.erl | 92 +++++++++++++++++++++--- 4 files changed, 178 insertions(+), 31 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 66c1f42a9..6eed923d3 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -1302,9 +1302,9 @@ move_array_element( Reg, Index, {x_reg, X} -) when X < ?MAX_REG andalso is_integer(Index) -> - I1 = jit_x86_64_asm_unimplemented:movq({Index * 8, Reg}, Temp), - I2 = jit_x86_64_asm_unimplemented:movq(Temp, ?X_REG(X)), +) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_aarch64_asm:ldr(Temp, {Reg, Index * 8}), + I2 = jit_aarch64_asm:str(Temp, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_array_element( @@ -1312,9 +1312,9 @@ move_array_element( Reg, Index, {ptr, Dest} -) when is_integer(Index) -> - I1 = jit_x86_64_asm_unimplemented:movq({Index * 8, Reg}, Temp), - I2 = jit_x86_64_asm_unimplemented:movq(Temp, {0, Dest}), +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_aarch64_asm:ldr(Temp, {Reg, Index * 8}), + I2 = jit_aarch64_asm:str(Temp, {Dest, 0}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_array_element( @@ -1323,7 +1323,7 @@ move_array_element( Reg, Index, {y_reg, Y} -) when is_integer(Index) -> +) when is_atom(Reg) andalso is_integer(Index) -> I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), I2 = jit_aarch64_asm:ldr(Temp2, {Reg, Index * 8}), I3 = jit_aarch64_asm:str(Temp2, {Temp1, Y * 8}), @@ -1337,9 +1337,9 @@ move_array_element( Index, {y_reg, Y} ) when is_integer(Index) -> - I1 = jit_x86_64_asm_unimplemented:movq(?Y_REGS, Temp), - I2 = jit_x86_64_asm_unimplemented:movq({Index * 8, Reg}, Reg), - I3 = jit_x86_64_asm_unimplemented:movq(Reg, {Y * 8, Temp}), + I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(Reg, {Reg, Index * 8}), + I3 = jit_aarch64_asm:str(Reg, {Temp, Y * 8}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; @@ -1361,14 +1361,36 @@ move_array_element( {free, IndexReg}, {x_reg, X} ) when X < ?MAX_REG andalso is_atom(IndexReg) -> - I1 = jit_x86_64_asm_unimplemented:shlq(3, IndexReg), - I2 = jit_x86_64_asm_unimplemented:addq(Reg, IndexReg), - I3 = jit_x86_64_asm_unimplemented:movq({0, IndexReg}, IndexReg), - I4 = jit_x86_64_asm_unimplemented:movq(IndexReg, ?X_REG(X)), + I1 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}), + I2 = jit_aarch64_asm:str(IndexReg, ?X_REG(X)), {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg ), - Stream1 = StreamModule:append(Stream0, <>), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + available_fpregs = AvailableFPRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0, + available_fpregs = AvailableFPRegs0 + } = State, + Reg, + {free, IndexReg}, + {ptr, PtrReg} +) when is_atom(IndexReg) -> + I1 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}), + I2 = jit_aarch64_asm:str(IndexReg, {PtrReg, 0}), + {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( + AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append(Stream0, <>), State#state{ available_regs = AvailableRegs1, available_fpregs = AvailableFPRegs1, @@ -1387,16 +1409,14 @@ move_array_element( {free, IndexReg}, {y_reg, Y} ) when ?IS_GPR(IndexReg) -> - I1 = jit_x86_64_asm_unimplemented:movq(?Y_REGS, Temp), - I2 = jit_x86_64_asm_unimplemented:shlq(3, IndexReg), - I3 = jit_x86_64_asm_unimplemented:addq(Reg, IndexReg), - I4 = jit_x86_64_asm_unimplemented:movq({0, IndexReg}, IndexReg), - I5 = jit_x86_64_asm_unimplemented:movq(IndexReg, {Y * 8, Temp}), + I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}), + I3 = jit_aarch64_asm:str(IndexReg, {Temp, Y * 8}), {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg ), Stream1 = StreamModule:append( - Stream0, <> + Stream0, <> ), State#state{ available_regs = AvailableRegs1, @@ -1418,7 +1438,7 @@ get_array_element( Reg, Index ) -> - I1 = jit_x86_64_asm_unimplemented:movq({Index * 8, Reg}, ElemReg), + I1 = jit_aarch64_asm:ldr(ElemReg, {Reg, Index * 8}), Stream1 = StreamModule:append(Stream0, <>), { State#state{ diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index 33e1e4252..ef55c9997 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -20,6 +20,7 @@ -export([ add/3, + add/4, b/1, bcc/2, blr/1, @@ -83,6 +84,23 @@ add(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm = %% 0x91000000 | Imm << 10 | Rn << 5 | Rd <<(16#91000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>. +%% ADD (shifted register) +%% ADD Rd, Rn, Rm, {lsl, #amount} +-spec add(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register(), {lsl, 0..63}) -> + binary(). +add(Rd, Rn, Rm, {lsl, Amount}) when + is_atom(Rd), is_atom(Rn), is_atom(Rm), is_integer(Amount), Amount >= 0, Amount =< 63 +-> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% AArch64 ADD (immediate) encoding: 1001000100iiiiiiiiiiiinnnnndddddd + %% 0x8B000000 | Rm << 16 | Amount << 10 | Rn << 5 | Rd + << + (16#8B000000 bor (RmNum bsl 16) bor ((Amount band 16#3F) bsl 10) bor (RnNum bsl 5) bor + RdNum):32/little + >>. + %% Emit an unconditional branch (B) to a 32-bit relative offset (AArch64 encoding) %% offset is in bytes, relative to the next instruction -spec b(integer()) -> binary(). @@ -134,6 +152,25 @@ ldr(Dst, {BaseReg, Offset}) when %% 0xf9400000 | (Offset div 8) << 10 | BaseReg << 5 | Dst << (16#F9400000 bor ((Offset div 8) bsl 10) bor (BaseRegNum bsl 5) bor DstNum):32/little + >>; +ldr(Xt, {Xn, Xm}) when + is_atom(Xt), + is_atom(Xn), + is_atom(Xm) +-> + ldr(Xt, {Xn, Xm, lsl, 0}); +ldr(Xt, {Xn, Xm, lsl, Amount}) when + is_atom(Xt), + is_atom(Xn), + is_atom(Xm), + Amount =:= 0 orelse Amount =:= 3 +-> + XtNum = reg_to_num(Xt), + XnNum = reg_to_num(Xn), + XmNum = reg_to_num(Xm), + S = Amount div 3, + << + (16#F8606800 bor (XmNum bsl 16) bor (S bsl 12) bor (XnNum bsl 5) bor XtNum):32/little >>. %% Emit a load register (LDR) instruction for 32-bit load from memory (AArch64 encoding) diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index 81a437392..f7a3e187b 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -28,7 +28,8 @@ add_test_() -> [ ?_assertEqual(<<16#9100e0e7:32/little>>, jit_aarch64_asm:add(r7, r7, 56)), ?_assertEqual(<<16#91000000:32/little>>, jit_aarch64_asm:add(r0, r0, 0)), - ?_assertEqual(<<16#91000421:32/little>>, jit_aarch64_asm:add(r1, r1, 1)) + ?_assertEqual(<<16#91000421:32/little>>, jit_aarch64_asm:add(r1, r1, 1)), + ?_assertEqual(<<16#8b031041:32/little>>, jit_aarch64_asm:add(r1, r2, r3, {lsl, 4})) ]. b_test_() -> @@ -76,6 +77,19 @@ ldr_test_() -> ?_assertEqual( <<16#F84007E7:32/little>>, jit_aarch64_asm:ldr(r7, {sp}, 0) + ), + % shift + ?_assertEqual( + <<16#f8637841:32/little>>, + jit_aarch64_asm:ldr(r1, {r2, r3, lsl, 3}) + ), + ?_assertEqual( + <<16#f8677907:32/little>>, + jit_aarch64_asm:ldr(r7, {r8, r7, lsl, 3}) + ), + ?_assertEqual( + <<16#f8636841:32/little>>, + jit_aarch64_asm:ldr(r1, {r2, r3}) ) ]. diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 2aa5f43fc..36c48fa9f 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -506,12 +506,6 @@ call_fun_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -move_to_vm_register_test1(State, Source, Dest, Dump) -> - State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), - Stream = ?BACKEND:stream(State1), - ok = file:write_file("dump.bin", Stream), - ?assertEqual(dump_to_bin(Dump), Stream). - move_to_vm_register_test0(State, Source, Dest, Dump) -> State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), Stream = ?BACKEND:stream(State1), @@ -648,9 +642,91 @@ move_to_vm_register_test_() -> ] end}. +move_array_element_test0(State, Reg, Index, Dest, Dump) -> + State1 = ?BACKEND:move_array_element(State, Reg, Index, Dest), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + move_array_element_test_() -> - %% TODO: Implement AArch64 version - []. + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, r8, 2, {x_reg, 0}, << + " 0: f9400907 ldr x7, [x8, #16]\n" + " 4: f9001807 str x7, [x0, #48]" + >>) + end), + %% move_array_element: reg[x] to ptr + ?_test(begin + move_array_element_test0(State0, r8, 3, {ptr, r10}, << + " 0: f9400d07 ldr x7, [x8, #24]\n" + " 4: f9000147 str x7, [x10]" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, r8, 1, {y_reg, 2}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f9400508 ldr x8, [x8, #8]\n" + " 8: f90008e8 str x8, [x7, #16]" + >>) + end), + %% move_array_element: reg[x] to native reg (r10) + ?_test(begin + move_array_element_test0(State0, r8, 1, r10, << + " 0: f940050a ldr x10, [x8, #8]" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, r8, 7, {y_reg, 31}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f9401d08 ldr x8, [x8, #56]\n" + " 8: f9007ce8 str x8, [x7, #248]" + >>) + end), + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, r8, 7, {x_reg, 15}, << + " 0: f9401d07 ldr x7, [x8, #56]\n" + " 4: f9005407 str x7, [x0, #168]" + >>) + end), + %% move_array_element: reg_x[reg_y] to x_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), + move_array_element_test0(State1, r8, {free, Reg}, {x_reg, 2}, << + " 0: f9401107 ldr x7, [x8, #32]\n" + " 4: f8677907 ldr x7, [x8, x7, lsl #3]\n" + " 8: f9002007 str x7, [x0, #64]" + >>) + end), + %% move_array_element: reg_x[reg_y] to pointer (large x reg) + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), + move_array_element_test0(State1, r8, {free, Reg}, {ptr, r10}, << + " 0: f9401107 ldr x7, [x8, #32]\n" + " 4: f8677907 ldr x7, [x8, x7, lsl #3]\n" + " 8: f9000147 str x7, [x10]" + >>) + end), + %% move_array_element: reg_x[reg_y] to y_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), + move_array_element_test0(State1, r8, {free, Reg}, {y_reg, 31}, << + " 0: f9401107 ldr x7, [x8, #32]\n" + " 4: f9401408 ldr x8, [x0, #40]\n" + " 8: f8677907 ldr x7, [x8, x7, lsl #3]\n" + " c: f9007d07 str x7, [x8, #248]" + >>) + end) + ] + end}. get_array_element_test_() -> %% TODO: Implement AArch64 version From 8750b489e8c15ea6471fa11b0ef0f2eaff8b4a4f Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 22 Jul 2025 23:04:49 +0200 Subject: [PATCH 17/46] AArch64: further tests Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 203 ++++++++--------------- libs/jit/src/jit_aarch64_asm.erl | 29 +++- tests/libs/jit/jit_aarch64_asm_tests.erl | 5 + tests/libs/jit/jit_aarch64_tests.erl | 158 +++++++++++++++++- 4 files changed, 254 insertions(+), 141 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 6eed923d3..4c5db0f4c 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -161,8 +161,8 @@ {aarch64_register(), '<', 0} | {maybe_free_aarch64_register(), '==', 0} | {maybe_free_aarch64_register(), '!=', integer()} - | {'(uint8_t)', maybe_free_aarch64_register(), '==', false} - | {'(uint8_t)', maybe_free_aarch64_register(), '!=', false} + | {'(bool)', maybe_free_aarch64_register(), '==', false} + | {'(bool)', maybe_free_aarch64_register(), '!=', false} | {maybe_free_aarch64_register(), '&', non_neg_integer(), '!=', 0}. % ctx->e is 0x28 @@ -776,7 +776,7 @@ if_block_cond( {State2, ne, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, - {'(uint8_t)', RegOrTuple, '==', false} + {'(bool)', RegOrTuple, '==', false} ) -> Reg = case RegOrTuple of @@ -796,7 +796,7 @@ if_block_cond( {State2, ne, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, - {'(uint8_t)', RegOrTuple, '!=', false} + {'(bool)', RegOrTuple, '!=', false} ) -> Reg = case RegOrTuple of @@ -884,7 +884,7 @@ if_block_cond( stream_module = StreamModule, stream = Stream0 } = State0, - {'(uint8_t)', RegOrTuple, '&', Val} + {'(bool)', RegOrTuple, '&', Val} ) -> Reg = case RegOrTuple of @@ -1196,16 +1196,12 @@ set_args1({ptr, Source}, Reg) -> jit_aarch64_asm:ldr(Reg, {Source, 0}); set_args1({y_reg, X}, Reg) -> [ - jit_aarch64_asm:mov(Reg, ?Y_REGS), - jit_aarch64_asm:mov(Reg, {X * 8, Reg}) + jit_aarch64_asm:ldr(Reg, ?Y_REGS), + jit_aarch64_asm:ldr(Reg, {Reg, X * 8}) ]; set_args1(ArgReg, Reg) when ?IS_GPR(ArgReg) -> jit_aarch64_asm:mov(Reg, ArgReg); -set_args1(Arg, Reg) when is_integer(Arg) andalso Arg >= -16#80000000 andalso Arg < 16#80000000 -> - jit_aarch64_asm:mov(Reg, Arg); set_args1(Arg, Reg) when is_integer(Arg) -> - %% For large immediates, we need a more complex sequence in AArch64 - %% For now, just use the immediate (may need expansion later) jit_aarch64_asm:mov(Reg, Arg). %%----------------------------------------------------------------------------- @@ -1452,140 +1448,66 @@ get_array_element( state(), integer() | vm_register() | aarch64_register(), aarch64_register(), non_neg_integer() ) -> state(). move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, - {x_reg, X}, - Reg, - Index -) when X < ?MAX_REG andalso ?IS_GPR(Reg) andalso is_integer(Index) -> - I1 = jit_x86_64_asm_unimplemented:movq(?X_REG(X), Temp), - I2 = jit_x86_64_asm_unimplemented:movq(Temp, {Index * 8, Reg}), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, - {x_reg, X}, - Reg, - IndexReg -) when X < ?MAX_REG andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> - I1 = jit_x86_64_asm_unimplemented:movq(?X_REG(X), Temp), - I2 = jit_x86_64_asm_unimplemented:movq(Temp, {0, Reg, IndexReg, 8}), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, - {ptr, Source}, - Reg, - Index -) -> - I1 = jit_x86_64_asm_unimplemented:movq({0, Source}, Temp), - I2 = jit_x86_64_asm_unimplemented:movq(Temp, {Index * 8, Reg}), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = - State, - {y_reg, Y}, + #state{stream_module = StreamModule, stream = Stream0} = State0, + ValueReg, Reg, Index -) when ?IS_GPR(Reg) andalso is_integer(Index) -> - I1 = jit_x86_64_asm_unimplemented:movq(?Y_REGS, Temp), - I2 = jit_x86_64_asm_unimplemented:movq({Y * 8, Temp}, Temp), - I3 = jit_x86_64_asm_unimplemented:movq(Temp, {Index * 8, Reg}), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1}; +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + I1 = jit_aarch64_asm:str(ValueReg, {Reg, Index * 8}), + Stream1 = StreamModule:append(Stream0, I1), + State0#state{stream = Stream1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = - State, - {y_reg, Y}, + #state{stream_module = StreamModule, stream = Stream0} = State0, + ValueReg, Reg, IndexReg -) when ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> - I1 = jit_x86_64_asm_unimplemented:movq(?Y_REGS, Temp), - I2 = jit_x86_64_asm_unimplemented:movq({Y * 8, Temp}, Temp), - I3 = jit_x86_64_asm_unimplemented:movq(Temp, {0, Reg, IndexReg, 8}), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1}; -move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0} = State, Source, Reg, Index -) when ?IS_GPR(Source) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> - I1 = jit_x86_64_asm_unimplemented:movq(Source, {Index * 8, Reg}), +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + I1 = jit_aarch64_asm:str(ValueReg, {Reg, IndexReg, lsl, 3}), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; -move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0} = State, Source, Reg, Index -) when ?IS_SINT32_T(Source) andalso is_integer(Index) -> - I1 = jit_x86_64_asm_unimplemented:movq(Source, {Index * 8, Reg}), - Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; + State0#state{stream = Stream1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, - Source, + State0, + Value, Reg, Index -) when is_integer(Source) andalso is_integer(Index) -> - I1 = jit_x86_64_asm_unimplemented:movabsq(Source, Temp), - I2 = jit_x86_64_asm_unimplemented:movq(Temp, {Index * 8, Reg}), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}. +) -> + {State1, Temp} = copy_to_native_register(State0, Value), + State2 = move_to_array_element(State1, Temp, Reg, Index), + free_native_register(State2, Temp). move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, - {x_reg, X}, + State, + Value, BaseReg, IndexReg, Offset -) when X < ?MAX_REG andalso ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> - I1 = jit_x86_64_asm_unimplemented:movq(?X_REG(X), Temp), - I2 = jit_x86_64_asm_unimplemented:movq(Temp, {Offset, BaseReg, IndexReg, 8}), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; +) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 -> + move_to_array_element(State, Value, BaseReg, IndexReg + (Offset div 8)); move_to_array_element( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, - {y_reg, Y}, + ValueReg, BaseReg, IndexReg, Offset -) when ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> - I1 = jit_x86_64_asm_unimplemented:movq(?Y_REGS, Temp), - I2 = jit_x86_64_asm_unimplemented:movq({Y * 8, Temp}, Temp), - I3 = jit_x86_64_asm_unimplemented:movq(Temp, {Offset, BaseReg, IndexReg, 8}), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}; -move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0} = State, - Source, - BaseReg, - IndexReg, - Offset -) when - ?IS_GPR(Source) andalso ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) --> - I1 = jit_x86_64_asm_unimplemented:movq(Source, {Offset, BaseReg, IndexReg, 8}), - Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; -move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0} = State, - Source, - BaseReg, - IndexReg, - Offset -) when - ?IS_SINT32_T(Source) andalso ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso - is_integer(Offset) --> - I1 = jit_x86_64_asm_unimplemented:movq(Source, {Offset, BaseReg, IndexReg, 8}), - Stream1 = StreamModule:append(Stream0, I1 / binary), +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + I1 = jit_aarch64_asm:add(Temp, BaseReg, Offset), + I2 = jit_aarch64_asm:str(ValueReg, {BaseReg, IndexReg, lsl, 3}), + Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_array_element( - State, - Source, + State0, + Value, BaseReg, IndexReg, Offset -) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 -> - move_to_array_element(State, Source, BaseReg, IndexReg + (Offset div 8)). +) -> + {State1, ValueReg} = copy_to_native_register(State0, Value), + [Temp | _] = State1#state.available_regs, + I1 = jit_aarch64_asm:add(Temp, BaseReg, Offset), + I2 = jit_aarch64_asm:str(ValueReg, {BaseReg, IndexReg, lsl, 3}), + Stream1 = (State1#state.stream_module):append(State1#state.stream, <>), + State2 = State1#state{stream = Stream1}, + free_native_register(State2, ValueReg). -spec move_to_native_register(state(), value()) -> {state(), aarch64_register()}. move_to_native_register(State, Reg) when is_atom(Reg) -> @@ -1593,7 +1515,7 @@ move_to_native_register(State, Reg) when is_atom(Reg) -> move_to_native_register( #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg} ) when is_atom(Reg) -> - I1 = jit_x86_64_asm_unimplemented:movq({0, Reg}, Reg), + I1 = jit_aarch64_asm:ldr(Reg, {Reg, 0}), Stream1 = StreamModule:append(Stream0, I1), {State#state{stream = Stream1}, Reg}; move_to_native_register( @@ -1607,7 +1529,7 @@ move_to_native_register( ) when is_integer(Imm) -> - I1 = jit_x86_64_asm_unimplemented:movq(Imm, Reg), + I1 = jit_aarch64_asm:mov(Reg, Imm), Stream1 = StreamModule:append(Stream0, I1), {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; move_to_native_register( @@ -1633,8 +1555,8 @@ move_to_native_register( } = State, {y_reg, Y} ) -> - I1 = jit_x86_64_asm_unimplemented:movq(?Y_REGS, Reg), - I2 = jit_x86_64_asm_unimplemented:movq({Y * 8, Reg}, Reg), + I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(Reg, {Reg, Y * 8}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}; @@ -1658,13 +1580,13 @@ move_to_native_register( move_to_native_register( #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst ) when is_atom(RegSrc) orelse is_integer(RegSrc) -> - I = jit_x86_64_asm_unimplemented:movq(RegSrc, RegDst), + I = jit_aarch64_asm:mov(RegDst, RegSrc), Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; move_to_native_register( #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst -) when is_atom(Reg) -> - I1 = jit_x86_64_asm_unimplemented:movq({0, Reg}, RegDst), +) when ?IS_GPR(Reg) -> + I1 = jit_aarch64_asm:ldr(RegDst, {Reg, 0}), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; move_to_native_register( @@ -1672,14 +1594,14 @@ move_to_native_register( ) when X < ?MAX_REG -> - I1 = jit_x86_64_asm_unimplemented:movq(?X_REG(X), RegDst), + I1 = jit_aarch64_asm:ldr(RegDst, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; move_to_native_register( #state{stream_module = StreamModule, stream = Stream0} = State, {y_reg, Y}, RegDst ) -> - I1 = jit_x86_64_asm_unimplemented:movq(?Y_REGS, RegDst), - I2 = jit_x86_64_asm_unimplemented:movq({Y * 8, RegDst}, RegDst), + I1 = jit_aarch64_asm:ldr(RegDst, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(RegDst, {RegDst, Y * 8}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; @@ -1720,7 +1642,7 @@ copy_to_native_register( } = State, {ptr, Reg} ) when is_atom(Reg) -> - I1 = jit_x86_64_asm_unimplemented:movq({0, Reg}, SaveReg), + I1 = jit_aarch64_asm:ldr(SaveReg, {Reg, 0}), Stream1 = StreamModule:append(Stream0, I1), {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; copy_to_native_register(State, Reg) -> @@ -1805,8 +1727,17 @@ and_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) - State#state{stream = Stream1}. or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - I1 = jit_x86_64_asm_unimplemented:orq(Val, Reg), - Stream1 = StreamModule:append(Stream0, I1), + Stream1 = + try + I = jit_aarch64_asm:orr(Reg, Reg, Val), + StreamModule:append(Stream0, I) + catch + error:{unencodable_immediate, Val} -> + [Temp | _] = State#state.available_regs, + I1 = jit_aarch64_asm:mov(Temp, Val), + I2 = jit_aarch64_asm:orr(Reg, Reg, Temp), + StreamModule:append(Stream0, <>) + end, State#state{stream = Stream1}. add(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> @@ -1960,8 +1891,8 @@ return_labels_and_lines( SortedLabels, SortedLines ) -> - I2 = jit_x86_64_asm_unimplemented:retq(), - {_RewriteLEAOffset, I1} = jit_x86_64_asm_unimplemented:leaq_rel32({byte_size(I2), rip}, rax), + I1 = jit_aarch64_asm:adr(r0, 8), + I2 = jit_aarch64_asm:ret(), LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, LinesTable = <<<> || {Line, Offset} <- SortedLines>>, Stream1 = StreamModule:append( diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index ef55c9997..43a1eee56 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -492,7 +492,21 @@ orr(DstReg, Rn, Rm) when is_atom(DstReg), is_atom(Rn), is_atom(Rm) -> %% 10101010000mmmmm000000nnnnndddddd (64-bit) << (16#AA000000 bor (RmNum bsl 16) bor (RnNum bsl 5) bor DstNum):32/little - >>. + >>; +orr(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + case encode_bitmask_immediate(Imm) of + {ok, N, Immr, Imms} -> + % OR immediate encoding: sf=1(64b) 01(op) 100100 N immr imms Rn Rd + Opcode = 16#B2000000, + Instr = + Opcode bor (N bsl 22) bor (Immr bsl 16) bor (Imms bsl 10) bor (RnNum bsl 5) bor + RdNum, + <>; + error -> + error({unencodable_immediate, Imm}) + end. %% Emit a store register (STR) instruction for 64-bit store to memory -spec str(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary(). @@ -510,6 +524,19 @@ str(SrcReg, {BaseReg, Offset}) when %% 0xf9000000 | (Offset div 8) << 10 | BaseReg << 5 | SrcReg << (16#F9000000 bor ((Offset div 8) bsl 10) bor (BaseNum bsl 5) bor SrcNum):32/little + >>; +str(Xt, {Xn, Xm, lsl, Amount}) when + is_atom(Xt), + is_atom(Xn), + is_atom(Xm), + Amount =:= 0 orelse Amount =:= 3 +-> + XtNum = reg_to_num(Xt), + XnNum = reg_to_num(Xn), + XmNum = reg_to_num(Xm), + S = Amount div 3, + << + (16#F8206800 bor (XmNum bsl 16) bor (S bsl 12) bor (XnNum bsl 5) bor XtNum):32/little >>. %% Emit a store register (STR) instruction for 64-bit store to memory, with store-update (writeback) diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index f7a3e187b..4353efd10 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -157,6 +157,11 @@ str_test_() -> ?_assertEqual( <<16#F80007E7:32/little>>, jit_aarch64_asm:str(r7, {sp}, 0) + ), + % shift + ?_assertEqual( + <<16#f8237841:32/little>>, + jit_aarch64_asm:str(r1, {r2, r3, lsl, 3}) ) ]. diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 36c48fa9f..eb6505b6c 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -729,13 +729,163 @@ move_array_element_test_() -> end}. get_array_element_test_() -> - %% TODO: Implement AArch64 version - []. + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% get_array_element: reg[x] to new native reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401107 ldr x7, [x8, #32]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(r7, Reg) + end) + ] + end}. move_to_array_element_test_() -> - %% TODO: Implement AArch64 version - []. + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_array_element/4: x_reg to reg[x] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, 2), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9000907 str x7, [x8, #16]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: x_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, r9), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f8297907 str x7, [x8, x9, lsl #3]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: ptr to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {ptr, r7}, r8, r9), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f94000e7 ldr x7, [x7]\n" + " 4: f8297907 str x7, [x8, x9, lsl #3]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: y_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, r8, r9), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f94008e7 ldr x7, [x7, #16]\n" + " 8: f8297907 str x7, [x8, x9, lsl #3]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, 2, 1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9000907 str x7, [x8, #16]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, r9, 1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: 91000508 add x8, x8, #0x1\n" + " 8: f8297907 str x7, [x8, x9, lsl #3]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: imm to reg[x+offset] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, 42, r8, r9, 1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: d2800547 mov x7, #0x2a // #42\n" + " 4: 91000508 add x8, x8, #0x1\n" + " 8: f8297907 str x7, [x8, x9, lsl #3]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. +move_to_native_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_native_register/3: imm to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, 42, r8), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: d2800548 mov x8, #0x2a // #42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: reg to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, r7, r8), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: aa0703e8 mov x8, x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {ptr, reg} to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {ptr, r7}, r8), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f94000e8 ldr x8, [x7]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {x_reg, x} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, r8), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9402008 ldr x8, [x0, #64]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {y_reg, y} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, r8), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401408 ldr x8, [x0, #40]\n" + " 4: f9400908 ldr x8, [x8, #16]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. dump_to_bin(Dump) -> dump_to_bin0(Dump, addr, []). From aeb33d10a9045bdc36e772a7e9e44939e62d2c58 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 24 Jul 2025 19:02:36 +0200 Subject: [PATCH 18/46] AArch64: Fix encoding of tst instruction Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 8 +-- libs/jit/src/jit_aarch64_asm.erl | 60 +++++++++---------- tests/libs/jit/jit_aarch64_asm_tests.erl | 76 ++++++++++++++++++++---- 3 files changed, 97 insertions(+), 47 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 4c5db0f4c..744c74ea4 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -672,7 +672,7 @@ if_block_cond( {free, Reg0} -> Reg0; RegOrTuple -> RegOrTuple end, - I1 = jit_aarch64_asm:tst32(Reg, Reg), + I1 = jit_aarch64_asm:tst_w(Reg, Reg), I2 = jit_aarch64_asm:bcc(ne, 0), Code = << I1/binary, @@ -784,7 +784,7 @@ if_block_cond( RegOrTuple -> RegOrTuple end, % Test low 8 bits - I1 = jit_aarch64_asm:tst32(Reg, 16#FF), + I1 = jit_aarch64_asm:tst_w(Reg, 16#FF), I2 = jit_aarch64_asm:bcc(ne, 0), Code = << I1/binary, @@ -804,7 +804,7 @@ if_block_cond( RegOrTuple -> RegOrTuple end, % Test low 8 bits - I1 = jit_aarch64_asm:tst32(Reg, 16#FF), + I1 = jit_aarch64_asm:tst_w(Reg, 16#FF), I2 = jit_aarch64_asm:bcc(eq, 0), Code = << I1/binary, @@ -892,7 +892,7 @@ if_block_cond( RegOrTuple -> RegOrTuple end, % Test 8-bit value - I1 = jit_aarch64_asm:tst32(Reg, Val), + I1 = jit_aarch64_asm:tst_w(Reg, Val), I2 = jit_aarch64_asm:bcc(eq, 0), Code = << I1/binary, diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index 43a1eee56..f4023473f 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -42,7 +42,7 @@ str/2, str/3, tst/2, - tst32/2, + tst_w/2, stp/4, ldp/4, subs/3, @@ -355,7 +355,7 @@ orr_immediate(Dst, N, Immr, Imms) when %% Encode a value as AArch64 bitmask immediate %% Returns {ok, N, Immr, Imms} if encodable, error otherwise --spec encode_bitmask_immediate(integer()) -> {ok, integer(), integer(), integer()} | error. +-spec encode_bitmask_immediate(integer()) -> {ok, 0..1, integer(), integer()} | error. encode_bitmask_immediate(Value) when is_integer(Value) -> %% Convert to 64-bit unsigned UnsignedValue = Value band 16#FFFFFFFFFFFFFFFF, @@ -364,6 +364,20 @@ encode_bitmask_immediate(Value) when is_integer(Value) -> PatternSizes = [64, 32, 16, 8, 4, 2], try_pattern_sizes(UnsignedValue, PatternSizes). +%% Encode a value as AArch64 bitmask immediate for 32 bits values +%% Returns {ok, Immr, Imms} if encodable, error otherwise +-spec encode_bitmask_immediate_w(integer()) -> {ok, integer(), integer()} | error. +encode_bitmask_immediate_w(Value) when is_integer(Value) -> + %% Convert to 64-bit unsigned + UnsignedValue = Value band 16#FFFFFFFFFFFFFFFF, + + %% Try different pattern sizes (32, 16, 8, 4, 2) + PatternSizes = [32, 16, 8, 4, 2], + case try_pattern_sizes(UnsignedValue, PatternSizes) of + {ok, 0, Immr, Imms} -> {ok, Immr, Imms}; + error -> error + end. + %% Try encoding with different pattern sizes -spec try_pattern_sizes(integer(), [integer()]) -> {ok, integer(), integer(), integer()} | error. try_pattern_sizes(_, []) -> @@ -817,46 +831,30 @@ tst(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> <<(16#EA00001F bor (RmNum bsl 16) bor (RnNum bsl 5)):32/little>>; tst(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> RnNum = reg_to_num(Rn), - case Imm of - %% special case for #16 - 16 -> - <<(16#F27C001F bor (RnNum bsl 5)):32/little>>; + case encode_bitmask_immediate(Imm) of + {ok, N, Immr, Imms} -> + << + (16#F200001F bor (N bsl 22) bor (Immr bsl 16) bor (Imms bsl 10) bor (RnNum bsl 5)):32/little + >>; _ -> - if - Imm band (Imm - 1) =:= 0, Imm > 0, Imm =< 16#8000000000000000 -> - BitPos = trunc(math:log2(Imm)), - <<(16#F200001F bor (BitPos bsl 16) bor (RnNum bsl 5)):32/little>>; - true -> - << - (16#F200001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little - >> - end + error({unencodable_immediate, Imm}) end. %% Emit a 32-bit test instruction (bitwise AND, discarding result) --spec tst32(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). -tst32(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> +-spec tst_w(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). +tst_w(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> RnNum = reg_to_num(Rn), RmNum = reg_to_num(Rm), %% AArch64 TST (32-bit shifted register) encoding: TST Wn, Wm %% This is ANDS WZR, Wn, Wm: 01101010000mmmmm000000nnnnn11111 <<(16#6A00001F bor (RmNum bsl 16) bor (RnNum bsl 5)):32/little>>; -tst32(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> +tst_w(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> RnNum = reg_to_num(Rn), - case Imm of - %% special case for #16 - 16 -> - <<(16#721C001F bor (RnNum bsl 5)):32/little>>; + case encode_bitmask_immediate_w(Imm) of + {ok, Immr, Imms} -> + <<(16#7200001F bor (Immr bsl 16) bor (Imms bsl 10) bor (RnNum bsl 5)):32/little>>; _ -> - if - Imm band (Imm - 1) =:= 0, Imm > 0, Imm =< 16#80000000 -> - BitPos = trunc(math:log2(Imm)), - <<(16#7200001F bor (BitPos bsl 16) bor (RnNum bsl 5)):32/little>>; - true -> - << - (16#7200001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little - >> - end + error({unencodable_immediate, Imm}) end. %% Emit a subtract and set flags (SUBS) instruction (AArch64 encoding) diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index 4353efd10..15d9d1efb 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -202,30 +202,40 @@ lsl_test_() -> lsr_test_() -> [ - ?_assertEqual(<<16#D340FC00:32/little>>, jit_aarch64_asm:lsr(r0, r0, 0)), - ?_assertEqual(<<16#D340FC01:32/little>>, jit_aarch64_asm:lsr(r1, r0, 0)), + ?_assertEqual(asm(<<16#D340FC00:32/little>>, "lsr x0, x0, 0"), jit_aarch64_asm:lsr(r0, r0, 0)), + ?_assertEqual(asm(<<16#D340FC01:32/little>>, "lsr x1, x0, 0"), jit_aarch64_asm:lsr(r1, r0, 0)), ?_assertEqual(<<16#D360FC00:32/little>>, jit_aarch64_asm:lsr(r0, r0, 32)) ]. ret_test_() -> [ - ?_assertEqual(<<16#D65F03C0:32/little>>, jit_aarch64_asm:ret()) + ?_assertEqual(asm(<<16#D65F03C0:32/little>>, "ret"), jit_aarch64_asm:ret()) ]. tst_test_() -> [ - % TST reg, reg - ?_assertEqual(<<16#EA01001F:32/little>>, jit_aarch64_asm:tst(r0, r1)), - % TST reg, imm (power of 2) - ?_assertEqual(<<16#F27C001F:32/little>>, jit_aarch64_asm:tst(r0, 16)) + ?_assertEqual(asm(<<16#EA01001F:32/little>>, "tst x0, x1"), jit_aarch64_asm:tst(r0, r1)), + ?_assertEqual(asm(<<16#f240003f:32/little>>, "tst x1, #1"), jit_aarch64_asm:tst(r1, 1)), + ?_assertEqual(asm(<<16#f27c005f:32/little>>, "tst x2, #16"), jit_aarch64_asm:tst(r2, 16)), + ?_assertEqual(asm(<<16#f2401c7f:32/little>>, "tst x3, #255"), jit_aarch64_asm:tst(r3, 255)), + ?_assertEqual(asm(<<16#f240249f:32/little>>, "tst x4, #1023"), jit_aarch64_asm:tst(r4, 1023)), + ?_assertEqual(asm(<<16#f24014bf:32/little>>, "tst x5, #63"), jit_aarch64_asm:tst(r5, 63)), + ?_assertEqual(asm(<<16#f27b00df:32/little>>, "tst x6, #32"), jit_aarch64_asm:tst(r6, 32)), + ?_assertEqual(asm(<<16#f27a00ff:32/little>>, "tst x7, #64"), jit_aarch64_asm:tst(r7, 64)), + ?_assertEqual(asm(<<16#f27e051f:32/little>>, "tst x8, #0xc"), jit_aarch64_asm:tst(r8, 16#c)) ]. -tst32_test_() -> +tst_w_test_() -> [ - % TST32 reg, reg - ?_assertEqual(<<16#6A01001F:32/little>>, jit_aarch64_asm:tst32(r0, r1)), - % TST32 reg, imm (power of 2) - ?_assertEqual(<<16#721C001F:32/little>>, jit_aarch64_asm:tst32(r0, 16)) + ?_assertEqual(asm(<<16#6a01001f:32/little>>, "tst w0, w1"), jit_aarch64_asm:tst_w(r0, r1)), + ?_assertEqual(asm(<<16#7200003f:32/little>>, "tst w1, #1"), jit_aarch64_asm:tst_w(r1, 1)), + ?_assertEqual(asm(<<16#721c005f:32/little>>, "tst w2, #16"), jit_aarch64_asm:tst_w(r2, 16)), + ?_assertEqual(asm(<<16#72001c7f:32/little>>, "tst w3, #255"), jit_aarch64_asm:tst_w(r3, 255)), + ?_assertEqual(asm(<<16#7200249f:32/little>>, "tst w4, #1023"), jit_aarch64_asm:tst_w(r4, 1023)), + ?_assertEqual(asm(<<16#720014bf:32/little>>, "tst w5, #63"), jit_aarch64_asm:tst_w(r5, 63)), + ?_assertEqual(asm(<<16#721b00df:32/little>>, "tst w6, #32"), jit_aarch64_asm:tst_w(r6, 32)), + ?_assertEqual(asm(<<16#721a00ff:32/little>>, "tst w7, #64"), jit_aarch64_asm:tst_w(r7, 64)), + ?_assertEqual(asm(<<16#721e051f:32/little>>, "tst w8, #0xc"), jit_aarch64_asm:tst_w(r8, 16#c)) ]. bcc_test_() -> @@ -297,3 +307,45 @@ adr_test_() -> %% ADR with offset not a multiple of 4 is valid ?_assertEqual(<<16#70000000:32/little>>, jit_aarch64_asm:adr(r0, 3)) ]. + +asm(Bin, Str) -> + case erlang:system_info(machine) of + "ATOM" -> Bin; + "BEAM" -> + case os:cmd("which aarch64-elf-as") of + [] -> Bin; + _ -> + ok = file:write_file("test.S", Str ++ "\n"), + Dump = os:cmd("aarch64-elf-as -c test.S -o test.o && aarch64-elf-objdump -D test.o"), + DumpBin = list_to_binary(Dump), + DumpLines = binary:split(DumpBin, <<"\n">>, [global]), + AsmBin = asm_lines(DumpLines, <<>>), + if + AsmBin =:= Bin -> ok; + true -> + io:format("-------------------------------------------\n" + "~s\n" + "-------------------------------------------\n", [Dump]) + end, + ?assertEqual(AsmBin, Bin), + Bin + end + end. + +asm_lines([<<" ", Tail/binary>> | T], Acc) -> + [_Offset, HexStr0] = binary:split(Tail, <<":\t">>), + [HexStr, _] = binary:split(HexStr0, <<"\t">>), + AssembledBin = hex_to_bin(HexStr, <<>>), + asm_lines(T, <>); +asm_lines([_OtherLine | T], Acc) -> + asm_lines(T, Acc); +asm_lines([], Acc) -> + Acc. + +hex_to_bin(<<>>, Acc) -> Acc; +hex_to_bin(HexStr, Acc) -> + [HexChunk, Rest] = binary:split(HexStr, <<" ">>), + NumBits = byte_size(HexChunk) * 4, + HexVal = binary_to_integer(HexChunk, 16), + NewAcc = <>, + hex_to_bin(Rest, NewAcc). From 87a4098003aeebde6f28f506359425ed5cc6876c Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 24 Jul 2025 20:05:47 +0200 Subject: [PATCH 19/46] AArch64: handle mmap specificities of Apple Silicon Also fix error handling of mmap in mapped_file Signed-off-by: Paul Guyot --- src/libAtomVM/defaultatoms.def | 1 + src/libAtomVM/jit.h | 5 ++++ src/libAtomVM/module.c | 3 ++- src/libAtomVM/nifs.c | 2 ++ src/libAtomVM/sys.h | 12 +++++++++ src/platforms/generic_unix/lib/mapped_file.c | 16 +++++++++--- src/platforms/generic_unix/lib/sys.c | 27 ++++++++++++++++++++ 7 files changed, 62 insertions(+), 4 deletions(-) diff --git a/src/libAtomVM/defaultatoms.def b/src/libAtomVM/defaultatoms.def index 6ce852332..04aff1f84 100644 --- a/src/libAtomVM/defaultatoms.def +++ b/src/libAtomVM/defaultatoms.def @@ -207,3 +207,4 @@ X(EMU_FLAVOR_ATOM, "\xA", "emu_flavor") X(CODE_SERVER_ATOM, "\xB", "code_server") X(LOAD_ATOM, "\x4", "load") X(JIT_X86_64_ATOM, "\xA", "jit_x86_64") +X(JIT_AARCH64_ATOM, "\xB", "jit_aarch64") diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index a3d8a8fb7..ba0618c5f 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -163,6 +163,11 @@ enum TrapAndLoadResult #define JIT_JUMPTABLE_ENTRY_SIZE 5 #endif +#ifdef __arm64__ +#define JIT_ARCH_TARGET JIT_ARCH_AARCH64 +#define JIT_JUMPTABLE_ENTRY_SIZE 4 +#endif + /** * @brief Return the entry point from a given jit stream * diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c index a5f5c907e..bbfd83993 100644 --- a/src/libAtomVM/module.c +++ b/src/libAtomVM/module.c @@ -338,7 +338,8 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary for (int arch_index = 0; arch_index < ENDIAN_SWAP_16(native_code->architectures_count); arch_index++) { if (ENDIAN_SWAP_16(native_code->architectures[arch_index].architecture) == JIT_ARCH_TARGET && ENDIAN_SWAP_16(native_code->architectures[arch_index].variant) == JIT_VARIANT_PIC) { size_t offset = ENDIAN_SWAP_32(native_code->info_size) + ENDIAN_SWAP_32(native_code->architectures[arch_index].offset) + sizeof(native_code->info_size); - module_set_native_code(mod, ENDIAN_SWAP_32(native_code->labels), (ModuleNativeEntryPoint) ((const uint8_t *) &native_code->info_size + offset)); + ModuleNativeEntryPoint module_entry_point = sys_map_native_code((const uint8_t *) &native_code->info_size, ENDIAN_SWAP_32(native_code->size), offset); + module_set_native_code(mod, ENDIAN_SWAP_32(native_code->labels), module_entry_point); break; } } diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index 66135d8cf..ccccc7621 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -5678,6 +5678,8 @@ static term nif_jit_backend_module(Context *ctx, int argc, term argv[]) #if JIT_ARCH_TARGET == JIT_ARCH_X86_64 return JIT_X86_64_ATOM; +#elif JIT_ARCH_TARGET == JIT_ARCH_AARCH64 + return JIT_AARCH64_ATOM; #else #error Unknown JIT target #endif diff --git a/src/libAtomVM/sys.h b/src/libAtomVM/sys.h index 72d1d1788..a79688193 100644 --- a/src/libAtomVM/sys.h +++ b/src/libAtomVM/sys.h @@ -284,6 +284,18 @@ void sys_init_platform(GlobalContext *global); */ void sys_free_platform(GlobalContext *global); +/** + * @brief Map precompiled native code to a module entry point. + * + * @details If mmap module is executable, returns native_code + offset. + * Otherwise (Apple Silicon) copy it to an executable buffer. Only implemented + * on platforms with JIT. + * @param native_code pointer to native code chunk + * @param size size of native code chunk + * @param offset offset to the module entry point + */ +ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset); + #ifdef __cplusplus } #endif diff --git a/src/platforms/generic_unix/lib/mapped_file.c b/src/platforms/generic_unix/lib/mapped_file.c index f33aa183a..3a5880218 100644 --- a/src/platforms/generic_unix/lib/mapped_file.c +++ b/src/platforms/generic_unix/lib/mapped_file.c @@ -22,6 +22,7 @@ #include "utils.h" +#include #include #include #include @@ -50,9 +51,18 @@ MappedFile *mapped_file_open_beam(const char *file_name) fstat(mf->fd, &file_stats); mf->size = file_stats.st_size; - mf->mapped = mmap(NULL, mf->size, PROT_READ | PROT_EXEC, MAP_SHARED, mf->fd, 0); - if (IS_NULL_PTR(mf->mapped)) { - fprintf(stderr, "Cannot mmap %s\n", file_name); + int prot; +#ifdef AVM_NO_JIT + prot = PROT_READ; +#elif defined(__APPLE__) && defined(__arm64__) + prot = PROT_READ; +#else + prot = PROT_READ | PROT_EXEC; +#endif + + mf->mapped = mmap(NULL, mf->size, prot, MAP_SHARED, mf->fd, 0); + if (UNLIKELY(mf->mapped == MAP_FAILED)) { + fprintf(stderr, "Cannot mmap %s -- errno=%d\n", file_name, errno); close(mf->fd); free(mf); return NULL; diff --git a/src/platforms/generic_unix/lib/sys.c b/src/platforms/generic_unix/lib/sys.c index 547eac534..5a9a2503e 100644 --- a/src/platforms/generic_unix/lib/sys.c +++ b/src/platforms/generic_unix/lib/sys.c @@ -47,6 +47,12 @@ #ifndef AVM_NO_JIT #include "jit_stream_mmap.h" +#include +#include + +#if defined(__APPLE__) +#include +#endif #endif #include @@ -807,3 +813,24 @@ void sys_mbedtls_ctr_drbg_context_unlock(GlobalContext *global) } #endif + +#ifndef AVM_NO_JIT +ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset) +{ +#if defined(__APPLE__) && defined(__arm64__) + uint8_t *native_code_mmap = (uint8_t *) mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_JIT, -1, 0); + if (native_code_mmap == MAP_FAILED) { + fprintf(stderr, "Could not allocate mmap for native code"); + return NULL; + } + pthread_jit_write_protect_np(0); + memcpy(native_code_mmap, native_code, size); + pthread_jit_write_protect_np(1); + sys_icache_invalidate(native_code_mmap, size); + return (ModuleNativeEntryPoint) (native_code_mmap + offset); +#else + UNUSED(size); + return (ModuleNativeEntryPoint) (native_code + offset); +#endif +} +#endif From 27b09254e6e7588b76da3b8874a3cb8eaece63e2 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 24 Jul 2025 20:10:12 +0200 Subject: [PATCH 20/46] AArch64: enable Jit on arm64 Signed-off-by: Paul Guyot --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 28b3fa1df..0073085fa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,6 +55,10 @@ if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") if (NOT AVM_DISABLE_JIT) set(AVM_JIT_TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR}) endif() +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") + if (NOT AVM_DISABLE_JIT) + set(AVM_JIT_TARGET_ARCH "aarch64") + endif() else() if (NOT AVM_DISABLE_JIT) message("JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}") From 1612d94b31baa062c8c6346d0657f7a9a3889e65 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 24 Jul 2025 20:15:41 +0200 Subject: [PATCH 21/46] AArch64: precompile to AArch64 Signed-off-by: Paul Guyot --- libs/jit/src/jit_precompile.erl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl index f358138e6..3e3e15bd4 100644 --- a/libs/jit/src/jit_precompile.erl +++ b/libs/jit/src/jit_precompile.erl @@ -65,8 +65,15 @@ compile(Target, Dir, Path) -> Stream0 = jit_stream_binary:new(0), <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = CodeChunk, + + Arch = + case Target of + "x86_64" -> ?JIT_ARCH_X86_64; + "aarch64" -> ?JIT_ARCH_AARCH64 + end, + Stream1 = jit_stream_binary:append( - Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC) + Stream0, jit:beam_chunk_header(LabelsCount, Arch, ?JIT_VARIANT_PIC) ), Backend = list_to_atom("jit_" ++ Target), Stream2 = Backend:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), From e712e100df8e960416f93e94ad3dbb9aa252f6f3 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 24 Jul 2025 22:31:52 +0200 Subject: [PATCH 22/46] AArch64: add ldr with fp reg Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 16 +++--- libs/jit/src/jit_aarch64_asm.erl | 45 +++++++++++++++++ tests/libs/jit/jit_aarch64_asm_tests.erl | 59 ++++++++++++++++------ tests/libs/jit/jit_aarch64_tests.erl | 62 ++++++++++++++++++++++++ 4 files changed, 160 insertions(+), 22 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 744c74ea4..a56184cd3 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -131,8 +131,8 @@ Reg =:= r15) ). -define(IS_FPR(Reg), - (Reg =:= d0 orelse Reg =:= d1 orelse Reg =:= d2 orelse Reg =:= d3 orelse Reg =:= d4 orelse - Reg =:= d5 orelse Reg =:= d6 orelse Reg =:= d7) + (Reg =:= v0 orelse Reg =:= v1 orelse Reg =:= v2 orelse Reg =:= v3 orelse Reg =:= v4 orelse + Reg =:= v5 orelse Reg =:= v6 orelse Reg =:= v7) ). -type stream() :: any(). @@ -192,9 +192,9 @@ -define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000). -define(AVAILABLE_REGS, [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6]). --define(AVAILABLE_FPREGS, [d0, d1, d2, d3, d4, d5, d6, d7]). +-define(AVAILABLE_FPREGS, [v0, v1, v2, v3, v4, v5, v6, v7]). -define(PARAMETER_REGS, [r0, r1, r2, r3, r4, r5]). --define(PARAMETER_FPREGS, [d0, d1, d2, d3, d4, d5]). +-define(PARAMETER_FPREGS, [v0, v1, v2, v3, v4, v5]). %%----------------------------------------------------------------------------- %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. @@ -1570,8 +1570,8 @@ move_to_native_register( } = State, {fp_reg, F} ) -> - I1 = jit_x86_64_asm_unimplemented:movq(?FP_REGS, Temp), - I2 = jit_x86_64_asm_unimplemented:movsd({F * 8, Temp}, FPReg), + I1 = jit_aarch64_asm:ldr(Temp, ?FP_REGS), + I2 = jit_aarch64_asm:ldr_d(FPReg, {Temp, F * 8}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), {State#state{stream = Stream1, available_fpregs = AvailFT, used_regs = [FPReg | Used]}, FPReg}. @@ -1614,8 +1614,8 @@ move_to_native_register( {fp_reg, F}, RegDst ) -> - I1 = jit_x86_64_asm_unimplemented:movq(?FP_REGS, Temp), - I2 = jit_x86_64_asm_unimplemented:movsd({F * 8, Temp}, RegDst), + I1 = jit_aarch64_asm:ldr(Temp, ?FP_REGS), + I2 = jit_aarch64_asm:ldr_d(RegDst, {Temp, F * 8}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}. diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index f4023473f..8a6a290f8 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -31,6 +31,7 @@ and_/3, ldr/2, ldr_w/2, + ldr_d/2, ldr/3, lsl/3, lsr/3, @@ -70,8 +71,26 @@ | r13 | r14 | r15 + | r16 + | r17 + | r18 + | r19 + | r20 + | r21 + | r22 + | r23 + | r24 + | r25 + | r26 + | r27 + | r28 + | r29 + | r30 + | sp | xzr. +-type aarch64_simd_register() :: v0 | v1 | v2 | v3 | v4 | v5 | v6 | v7 | v30 | v31. + -type cc() :: eq | ne | cs | cc | mi | pl | vs | vc | hi | ls | ge | lt | gt | le | al | nv. %% Emit an ADD instruction (AArch64 encoding) @@ -173,6 +192,21 @@ ldr(Xt, {Xn, Xm, lsl, Amount}) when (16#F8606800 bor (XmNum bsl 16) bor (S bsl 12) bor (XnNum bsl 5) bor XtNum):32/little >>. +-spec ldr_d(aarch64_simd_register(), {aarch64_gpr_register(), integer()}) -> binary(). +ldr_d(Dt, {Rn, Offset}) when + is_atom(Dt), + is_atom(Rn), + is_integer(Offset), + Offset >= 0, + Offset =< 32760, + (Offset rem 8) =:= 0 +-> + DtNum = simd_reg_to_num(Dt), + RnNum = reg_to_num(Rn), + << + (16#FD400000 bor ((Offset div 8) bsl 10) bor (RnNum bsl 5) bor DtNum):32/little + >>. + %% Emit a load register (LDR) instruction for 32-bit load from memory (AArch64 encoding) %% Dst is destination register atom, Src is {BaseReg, Offset} tuple -spec ldr_w(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary(). @@ -694,6 +728,17 @@ reg_to_num(sp) -> 31; %% Zero register (XZR) is also r31 reg_to_num(xzr) -> 31. +simd_reg_to_num(v0) -> 0; +simd_reg_to_num(v1) -> 1; +simd_reg_to_num(v2) -> 2; +simd_reg_to_num(v3) -> 3; +simd_reg_to_num(v4) -> 4; +simd_reg_to_num(v5) -> 5; +simd_reg_to_num(v6) -> 6; +simd_reg_to_num(v7) -> 7; +simd_reg_to_num(v30) -> 30; +simd_reg_to_num(v31) -> 31. + %% Emit a conditional branch instruction -spec bcc(cc(), integer()) -> binary(). bcc(Cond, Offset) when is_atom(Cond), is_integer(Offset) -> diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index 15d9d1efb..5e5d783b6 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -99,6 +99,16 @@ ldr_w_test_() -> ?_assertEqual(<<16#b9406042:32/little>>, jit_aarch64_asm:ldr_w(r2, {r2, 96})) ]. +ldr_d_test_() -> + [ + ?_assertEqual( + asm(<<16#fd40001e:32/little>>, "ldr d30, [x0]"), jit_aarch64_asm:ldr_d(v30, {r0, 0}) + ), + ?_assertEqual( + asm(<<16#fd400420:32/little>>, "ldr d0, [x1, #8]"), jit_aarch64_asm:ldr_d(v0, {r1, 8}) + ) + ]. + mov_test_() -> [ % mov immediate - simple cases @@ -202,8 +212,12 @@ lsl_test_() -> lsr_test_() -> [ - ?_assertEqual(asm(<<16#D340FC00:32/little>>, "lsr x0, x0, 0"), jit_aarch64_asm:lsr(r0, r0, 0)), - ?_assertEqual(asm(<<16#D340FC01:32/little>>, "lsr x1, x0, 0"), jit_aarch64_asm:lsr(r1, r0, 0)), + ?_assertEqual( + asm(<<16#D340FC00:32/little>>, "lsr x0, x0, 0"), jit_aarch64_asm:lsr(r0, r0, 0) + ), + ?_assertEqual( + asm(<<16#D340FC01:32/little>>, "lsr x1, x0, 0"), jit_aarch64_asm:lsr(r1, r0, 0) + ), ?_assertEqual(<<16#D360FC00:32/little>>, jit_aarch64_asm:lsr(r0, r0, 32)) ]. @@ -218,7 +232,9 @@ tst_test_() -> ?_assertEqual(asm(<<16#f240003f:32/little>>, "tst x1, #1"), jit_aarch64_asm:tst(r1, 1)), ?_assertEqual(asm(<<16#f27c005f:32/little>>, "tst x2, #16"), jit_aarch64_asm:tst(r2, 16)), ?_assertEqual(asm(<<16#f2401c7f:32/little>>, "tst x3, #255"), jit_aarch64_asm:tst(r3, 255)), - ?_assertEqual(asm(<<16#f240249f:32/little>>, "tst x4, #1023"), jit_aarch64_asm:tst(r4, 1023)), + ?_assertEqual( + asm(<<16#f240249f:32/little>>, "tst x4, #1023"), jit_aarch64_asm:tst(r4, 1023) + ), ?_assertEqual(asm(<<16#f24014bf:32/little>>, "tst x5, #63"), jit_aarch64_asm:tst(r5, 63)), ?_assertEqual(asm(<<16#f27b00df:32/little>>, "tst x6, #32"), jit_aarch64_asm:tst(r6, 32)), ?_assertEqual(asm(<<16#f27a00ff:32/little>>, "tst x7, #64"), jit_aarch64_asm:tst(r7, 64)), @@ -230,12 +246,18 @@ tst_w_test_() -> ?_assertEqual(asm(<<16#6a01001f:32/little>>, "tst w0, w1"), jit_aarch64_asm:tst_w(r0, r1)), ?_assertEqual(asm(<<16#7200003f:32/little>>, "tst w1, #1"), jit_aarch64_asm:tst_w(r1, 1)), ?_assertEqual(asm(<<16#721c005f:32/little>>, "tst w2, #16"), jit_aarch64_asm:tst_w(r2, 16)), - ?_assertEqual(asm(<<16#72001c7f:32/little>>, "tst w3, #255"), jit_aarch64_asm:tst_w(r3, 255)), - ?_assertEqual(asm(<<16#7200249f:32/little>>, "tst w4, #1023"), jit_aarch64_asm:tst_w(r4, 1023)), + ?_assertEqual( + asm(<<16#72001c7f:32/little>>, "tst w3, #255"), jit_aarch64_asm:tst_w(r3, 255) + ), + ?_assertEqual( + asm(<<16#7200249f:32/little>>, "tst w4, #1023"), jit_aarch64_asm:tst_w(r4, 1023) + ), ?_assertEqual(asm(<<16#720014bf:32/little>>, "tst w5, #63"), jit_aarch64_asm:tst_w(r5, 63)), ?_assertEqual(asm(<<16#721b00df:32/little>>, "tst w6, #32"), jit_aarch64_asm:tst_w(r6, 32)), ?_assertEqual(asm(<<16#721a00ff:32/little>>, "tst w7, #64"), jit_aarch64_asm:tst_w(r7, 64)), - ?_assertEqual(asm(<<16#721e051f:32/little>>, "tst w8, #0xc"), jit_aarch64_asm:tst_w(r8, 16#c)) + ?_assertEqual( + asm(<<16#721e051f:32/little>>, "tst w8, #0xc"), jit_aarch64_asm:tst_w(r8, 16#c) + ) ]. bcc_test_() -> @@ -310,22 +332,30 @@ adr_test_() -> asm(Bin, Str) -> case erlang:system_info(machine) of - "ATOM" -> Bin; + "ATOM" -> + Bin; "BEAM" -> case os:cmd("which aarch64-elf-as") of - [] -> Bin; + [] -> + Bin; _ -> ok = file:write_file("test.S", Str ++ "\n"), - Dump = os:cmd("aarch64-elf-as -c test.S -o test.o && aarch64-elf-objdump -D test.o"), + Dump = os:cmd( + "aarch64-elf-as -c test.S -o test.o && aarch64-elf-objdump -D test.o" + ), DumpBin = list_to_binary(Dump), DumpLines = binary:split(DumpBin, <<"\n">>, [global]), AsmBin = asm_lines(DumpLines, <<>>), if - AsmBin =:= Bin -> ok; + AsmBin =:= Bin -> + ok; true -> - io:format("-------------------------------------------\n" - "~s\n" - "-------------------------------------------\n", [Dump]) + io:format( + "-------------------------------------------\n" + "~s\n" + "-------------------------------------------\n", + [Dump] + ) end, ?assertEqual(AsmBin, Bin), Bin @@ -342,7 +372,8 @@ asm_lines([_OtherLine | T], Acc) -> asm_lines([], Acc) -> Acc. -hex_to_bin(<<>>, Acc) -> Acc; +hex_to_bin(<<>>, Acc) -> + Acc; hex_to_bin(HexStr, Acc) -> [HexChunk, Rest] = binary:split(HexStr, <<" ">>), NumBits = byte_size(HexChunk) * 4, diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index eb6505b6c..a99f47b31 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -838,6 +838,58 @@ move_to_native_register_test_() -> end, fun(State0) -> [ + %% move_to_native_register/2: imm + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, 42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: d2800547 mov x7, #0x2a // #42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {ptr, reg} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {ptr, r6}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r6, Reg), + Dump = << + " 0: f94000c6 ldr x6, [x6]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {x_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 3}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: f9402407 ldr x7, [x0, #72]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {y_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 3}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f9400ce7 ldr x7, [x7, #24]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {fp_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {fp_reg, 3}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(v0, Reg), + Dump = << + " 0: f9406007 ldr x7, [x0, #192]\n" + " 4: fd400ce0 ldr d0, [x7, #24]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), %% move_to_native_register/3: imm to reg ?_test(begin State1 = ?BACKEND:move_to_native_register(State0, 42, r8), @@ -883,6 +935,16 @@ move_to_native_register_test_() -> " 4: f9400908 ldr x8, [x8, #16]" >>, ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {fp_reg, N} + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {fp_reg, 3}, v0), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9406007 ldr x7, [x0, #192]\n" + " 4: fd400ce0 ldr d0, [x7, #24]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) end) ] end}. From c0b81a418bf4ef67bd117ac80578cc7ea9fe404a Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 24 Jul 2025 23:16:32 +0200 Subject: [PATCH 23/46] AArch64: multiply Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 47 +++++++++++++-- libs/jit/src/jit_aarch64_asm.erl | 47 ++++++++++++++- tests/libs/jit/jit_aarch64_asm_tests.erl | 51 ++++++++++++++-- tests/libs/jit/jit_aarch64_tests.erl | 75 ++++++++++++++++++++++++ 4 files changed, 210 insertions(+), 10 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index a56184cd3..154e328cb 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -1741,12 +1741,12 @@ or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> State#state{stream = Stream1}. add(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - I1 = jit_x86_64_asm_unimplemented:addq(Val, Reg), + I1 = jit_aarch64_asm:add(Reg, Reg, Val), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}. sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - I1 = jit_x86_64_asm_unimplemented:subq(Val, Reg), + I1 = jit_aarch64_asm:sub(Reg, Reg, Val), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}. @@ -1754,19 +1754,56 @@ mul(State, _Reg, 1) -> State; mul(State, Reg, 2) -> shift_left(State, Reg, 1); +mul(#state{available_regs = [Temp | _]} = State, Reg, 3) -> + I1 = jit_aarch64_asm:lsl(Temp, Reg, 1), + I2 = jit_aarch64_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; mul(State, Reg, 4) -> shift_left(State, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> + I1 = jit_aarch64_asm:lsl(Temp, Reg, 2), + I2 = jit_aarch64_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 6) -> + State1 = mul(State0, Reg, 3), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 7) -> + I1 = jit_aarch64_asm:lsl(Temp, Reg, 3), + I2 = jit_aarch64_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; mul(State, Reg, 8) -> shift_left(State, Reg, 3); +mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> + I1 = jit_aarch64_asm:lsl(Temp, Reg, 3), + I2 = jit_aarch64_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 10) -> + State1 = mul(State0, Reg, 5), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 15) -> + I1 = jit_aarch64_asm:lsl(Temp, Reg, 4), + I2 = jit_aarch64_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; mul(State, Reg, 16) -> shift_left(State, Reg, 4); mul(State, Reg, 32) -> shift_left(State, Reg, 5); mul(State, Reg, 64) -> shift_left(State, Reg, 6); -mul(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - I1 = jit_x86_64_asm_unimplemented:imulq(Val, Reg), - Stream1 = StreamModule:append(Stream0, I1), +mul( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Val +) -> + % multiply by decomposing by power of 2 + I1 = jit_aarch64_asm:mov(Temp, Val), + I2 = jit_aarch64_asm:mul(Reg, Reg, Temp), + Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}. -spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index 8a6a290f8..1b07d2b66 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -21,6 +21,10 @@ -export([ add/3, add/4, + sub/3, + sub/4, + mul/3, + madd/4, b/1, bcc/2, blr/1, @@ -101,7 +105,9 @@ add(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm = RnNum = reg_to_num(Rn), %% AArch64 ADD (immediate) encoding: 1001000100iiiiiiiiiiiinnnnndddddd %% 0x91000000 | Imm << 10 | Rn << 5 | Rd - <<(16#91000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>. + <<(16#91000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>; +add(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> + add(Rd, Rn, Rm, {lsl, 0}). %% ADD (shifted register) %% ADD Rd, Rn, Rm, {lsl, #amount} @@ -918,6 +924,28 @@ subs(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> %% AArch64 SUBS (register): 11101011000mmmmm000000nnnnndddddd <<(16#EB000000 bor (RmNum bsl 16) bor (RnNum bsl 5) bor RdNum):32/little>>. +-spec sub(aarch64_gpr_register(), aarch64_gpr_register(), integer() | aarch64_gpr_register()) -> + binary(). +sub(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + <<(16#D1000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>; +sub(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> + sub(Rd, Rn, Rm, {lsl, 0}). + +-spec sub(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register(), {lsl, 0..63}) -> + binary(). +sub(Rd, Rn, Rm, {lsl, Amount}) when + is_atom(Rd), is_atom(Rn), is_atom(Rm), is_integer(Amount), Amount >= 0, Amount =< 63 +-> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + << + (16#CB000000 bor (RmNum bsl 16) bor ((Amount band 16#3F) bsl 10) bor (RnNum bsl 5) bor + RdNum):32/little + >>. + %% Emit an ADR (PC-relative address) instruction (AArch64 encoding) %% Dst is destination register atom, Offset is signed immediate (in bytes, -1MB..+1MB) -spec adr(aarch64_gpr_register(), integer()) -> binary(). @@ -927,3 +955,20 @@ adr(Dst, Imm) when is_atom(Dst), is_integer(Imm), Imm >= -1048576, Imm =< 104857 ImmHi = Imm bsr 2, Word = (16#10000000) bor (ImmLo bsl 29) bor ((ImmHi band 16#7FFFF) bsl 5) bor DstNum, <>. + +-spec mul(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register()) -> binary(). +mul(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> + madd(Rd, Rn, Rm, xzr). + +-spec madd( + aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register() +) -> binary(). +madd(Rd, Rn, Rm, Ra) when is_atom(Rd), is_atom(Rn), is_atom(Rm), is_atom(Ra) -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + RaNum = reg_to_num(Ra), + << + (16#9B000000 bor (RmNum bsl 16) bor (RaNum bsl 10) bor (RnNum bsl 5) bor + RdNum):32/little + >>. diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index 5e5d783b6..d30054f6a 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -26,10 +26,53 @@ add_test_() -> [ - ?_assertEqual(<<16#9100e0e7:32/little>>, jit_aarch64_asm:add(r7, r7, 56)), - ?_assertEqual(<<16#91000000:32/little>>, jit_aarch64_asm:add(r0, r0, 0)), - ?_assertEqual(<<16#91000421:32/little>>, jit_aarch64_asm:add(r1, r1, 1)), - ?_assertEqual(<<16#8b031041:32/little>>, jit_aarch64_asm:add(r1, r2, r3, {lsl, 4})) + ?_assertEqual( + asm(<<16#9100e0e7:32/little>>, "add x7, x7, #56"), jit_aarch64_asm:add(r7, r7, 56) + ), + ?_assertEqual( + asm(<<16#91000000:32/little>>, "add x0, x0, #0"), jit_aarch64_asm:add(r0, r0, 0) + ), + ?_assertEqual( + asm(<<16#91000421:32/little>>, "add x1, x1, #1"), jit_aarch64_asm:add(r1, r1, 1) + ), + ?_assertEqual( + asm(<<16#8b031041:32/little>>, "add x1, x2, x3, lsl #4"), + jit_aarch64_asm:add(r1, r2, r3, {lsl, 4}) + ), + ?_assertEqual( + asm(<<16#8b030041:32/little>>, "add x1, x2, x3"), jit_aarch64_asm:add(r1, r2, r3) + ) + ]. + +sub_test_() -> + [ + ?_assertEqual( + asm(<<16#d100e0e7:32/little>>, "sub x7, x7, #56"), jit_aarch64_asm:sub(r7, r7, 56) + ), + ?_assertEqual( + asm(<<16#d1000000:32/little>>, "sub x0, x0, #0"), jit_aarch64_asm:sub(r0, r0, 0) + ), + ?_assertEqual( + asm(<<16#d1000421:32/little>>, "sub x1, x1, #1"), jit_aarch64_asm:sub(r1, r1, 1) + ), + ?_assertEqual( + asm(<<16#cb031041:32/little>>, "sub x1, x2, x3, lsl #4"), + jit_aarch64_asm:sub(r1, r2, r3, {lsl, 4}) + ), + ?_assertEqual( + asm(<<16#cb030041:32/little>>, "sub x1, x2, x3"), jit_aarch64_asm:sub(r1, r2, r3) + ) + ]. + +madd_test_() -> + [ + ?_assertEqual( + asm(<<16#9b037c41:32/little>>, "mul x1, x2, x3"), jit_aarch64_asm:mul(r1, r2, r3) + ), + ?_assertEqual( + asm(<<16#9b031041:32/little>>, "madd x1, x2, x3, x4"), + jit_aarch64_asm:madd(r1, r2, r3, r4) + ) ]. b_test_() -> diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index a99f47b31..d901ab027 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -948,6 +948,81 @@ move_to_native_register_test_() -> end) ] end}. + +mul_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:mul(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +mul_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + mul_test0(State0, r2, 2, << + "0: d37ff842 lsl x2, x2, #1" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 3, << + " 0: d37ff847 lsl x7, x2, #1\n" + " 4: 8b0200e2 add x2, x7, x2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 4, << + "0: d37ef442 lsl x2, x2, #2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 5, << + " 0: d37ef447 lsl x7, x2, #2\n" + " 4: 8b0200e2 add x2, x7, x2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 6, << + " 0: d37ff847 lsl x7, x2, #1\n" + " 4: 8b0200e2 add x2, x7, x2\n" + " 8: d37ff842 lsl x2, x2, #1" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 7, << + " 0: d37df047 lsl x7, x2, #3\n" + " 4: cb0200e2 sub x2, x7, x2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 8, << + "0: d37df042 lsl x2, x2, #3" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 9, << + " 0: d37df047 lsl x7, x2, #3\n" + " 4: 8b0200e2 add x2, x7, x2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 10, << + " 0: d37ef447 lsl x7, x2, #2\n" + " 4: 8b0200e2 add x2, x7, x2\n" + " 8: d37ff842 lsl x2, x2, #1" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 11, << + " 0: d2800167 mov x7, #0xb // #11\n" + " 4: 9b077c42 mul x2, x2, x7" + >>) + end) + ] + end}. + dump_to_bin(Dump) -> dump_to_bin0(Dump, addr, []). From 731a41553f2e63862572a7e90fb6ab515b31e900 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 24 Jul 2025 23:40:43 +0200 Subject: [PATCH 24/46] AArch64: set_bs, rewrite_cp_offset Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 40 ++++++++++++++++++++------------ libs/jit/src/jit_aarch64_asm.erl | 7 ++++++ 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 154e328cb..4ae733875 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -1840,9 +1840,9 @@ decrement_reductions_and_maybe_schedule_next( -spec call_or_schedule_next(state(), non_neg_integer()) -> state(). call_or_schedule_next(State0, Label) -> - {State1, RewriteOffset} = set_cp(State0), + {State1, RewriteOffset, RewriteSize} = set_cp(State0), State2 = call_only_or_schedule_next(State1, Label), - rewrite_cp_offset(State2, RewriteOffset). + rewrite_cp_offset(State2, RewriteOffset, RewriteSize). call_only_or_schedule_next( #state{ @@ -1870,11 +1870,11 @@ call_only_or_schedule_next( call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). call_primitive_with_cp(State0, Primitive, Args) -> - {State1, RewriteOffset} = set_cp(State0), + {State1, RewriteOffset, RewriteSize} = set_cp(State0), State2 = call_primitive_last(State1, Primitive, Args), - rewrite_cp_offset(State2, RewriteOffset). + rewrite_cp_offset(State2, RewriteOffset, RewriteSize). --spec set_cp(state()) -> {state(), non_neg_integer()}. +-spec set_cp(state()) -> {state(), non_neg_integer(), 4 | 8}. set_cp(State0) -> % get module index (dynamically) {#state{stream_module = StreamModule, stream = Stream0} = State1, Reg} = get_module_index( @@ -1883,30 +1883,40 @@ set_cp(State0) -> Offset = StreamModule:offset(Stream0), % build cp with module_index << 24 I1 = jit_aarch64_asm:lsl(Reg, Reg, 24), - I2 = jit_aarch64_asm:mov(?IP0_REG, 0), + if + Offset >= 16250 -> + I2 = jit_aarch64_asm:nop(), + I3 = jit_aarch64_asm:nop(), + RewriteSize = 8; + true -> + I2 = jit_aarch64_asm:nop(), + I3 = <<>>, + RewriteSize = 4 + end, MOVOffset = Offset + byte_size(I1), - I3 = jit_aarch64_asm:orr(Reg, Reg, ?IP0_REG), - I4 = jit_aarch64_asm:str(Reg, ?CP), - Code = <>, + I4 = jit_aarch64_asm:orr(Reg, Reg, ?IP0_REG), + I5 = jit_aarch64_asm:str(Reg, ?CP), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), State2 = State1#state{stream = Stream1}, State3 = free_native_register(State2, Reg), - {State3, MOVOffset}. + {State3, MOVOffset, RewriteSize}. --spec rewrite_cp_offset(state(), non_neg_integer()) -> state(). +-spec rewrite_cp_offset(state(), non_neg_integer(), 4 | 8) -> state(). rewrite_cp_offset( #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0, - RewriteOffset + RewriteOffset, + _RewriteSize ) -> NewOffset = StreamModule:offset(Stream0) - CodeOffset, NewMoveInstr = jit_aarch64_asm:mov(?IP0_REG, NewOffset bsl 2), - ?ASSERT(byte_size(NewMoveInstr) =:= 4), + ?ASSERT(byte_size(NewMoveInstr) =< _RewriteSize), Stream1 = StreamModule:replace(Stream0, RewriteOffset, NewMoveInstr), State0#state{stream = Stream1}. set_bs(#state{stream_module = StreamModule, stream = Stream0} = State0, TermReg) -> - I1 = jit_x86_64_asm_unimplemented:movq(TermReg, ?BS), - I2 = jit_x86_64_asm_unimplemented:movq(0, ?BS_OFFSET), + I1 = jit_aarch64_asm:str(TermReg, ?BS), + I2 = jit_aarch64_asm:str(xzr, ?BS_OFFSET), Stream1 = StreamModule:append(Stream0, <>), State0#state{stream = Stream1}. diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index 1b07d2b66..c9fe76024 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -44,6 +44,7 @@ movz/3, orr/3, ret/0, + nop/0, str/2, str/3, tst/2, @@ -872,6 +873,12 @@ ret() -> %% 11010110010111110000001111000000 <<16#D65F03C0:32/little>>. +%% Emit a NOP instruction +-spec nop() -> binary(). +nop() -> + %% 11010101000000110010000000011111 + <<16#d503201f:32/little>>. + %% Emit a test instruction (bitwise AND, discarding result) -spec tst(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). tst(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> From a9f869890d2e82eae1404f94ba3d5d620a1a6dd6 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 18 Aug 2025 20:00:52 +0200 Subject: [PATCH 25/46] AArch64: run CI tests on macOS Signed-off-by: Paul Guyot --- .github/workflows/build-and-test-macos.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/build-and-test-macos.yaml b/.github/workflows/build-and-test-macos.yaml index 72ac304f9..d057c4a43 100644 --- a/.github/workflows/build-and-test-macos.yaml +++ b/.github/workflows/build-and-test-macos.yaml @@ -47,6 +47,14 @@ jobs: otp: "28" cmake_opts_other: "-DAVM_DISABLE_JIT=OFF" + - os: "macos-14" + otp: "28" + cmake_opts_other: "-DAVM_DISABLE_JIT=OFF" + + - os: "macos-15" + otp: "28" + cmake_opts_other: "-DAVM_DISABLE_JIT=OFF" + steps: # Setup - name: "Checkout repo" From 580f0477e85cfb40d07b27b91966c1d4198f8f9a Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 19 Aug 2025 00:19:18 +0200 Subject: [PATCH 26/46] AArch64: implement missing functions and workaround for 64 bits integers Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 105 ++++++++++------ libs/jit/src/jit_aarch64_asm.erl | 115 ++++++++---------- .../erlang_tests/test_add_avm_pack_binary.erl | 7 ++ tests/erlang_tests/test_add_avm_pack_file.erl | 14 ++- tests/erlang_tests/test_close_avm_pack.erl | 12 ++ tests/erlang_tests/test_code_load_abs.erl | 13 +- tests/erlang_tests/test_code_load_binary.erl | 7 ++ tests/libs/jit/jit_aarch64_asm_tests.erl | 2 + tests/libs/jit/jit_aarch64_tests.erl | 22 ++-- tests/test.c | 8 +- 10 files changed, 185 insertions(+), 120 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 4ae733875..b6e565b7f 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -115,14 +115,14 @@ | r13 | r14 | r15 - | d0 - | d1 - | d2 - | d3 - | d4 - | d5 - | d6 - | d7. + | v0 + | v1 + | v2 + | v3 + | v4 + | v5 + | v6 + | v7. -define(IS_GPR(Reg), (Reg =:= r0 orelse Reg =:= r1 orelse Reg =:= r2 orelse Reg =:= r3 orelse Reg =:= r4 orelse @@ -488,7 +488,7 @@ return_if_not_equal_to_ctx( _ -> jit_aarch64_asm:orr(r0, xzr, Reg) end, I4 = jit_aarch64_asm:ret(), - I2 = jit_aarch64_asm:bcc(eq, byte_size(I3) + byte_size(I4)), + I2 = jit_aarch64_asm:bcc(eq, 4 + byte_size(I3) + byte_size(I4)), Stream1 = StreamModule:append(Stream0, <>), {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( AvailableRegs0, AvailableFPRegs0, UsedRegs0, Reg @@ -823,18 +823,25 @@ if_block_cond( {Reg, '&', Mask, '!=', Val} ) when ?IS_GPR(Reg) -> % AND with mask - I1 = jit_aarch64_asm:and_(Temp, Reg, Mask), + OffsetBefore = StreamModule:offset(Stream0), + Stream1 = + try + I = jit_aarch64_asm:and_(Temp, Reg, Mask), + StreamModule:append(Stream0, I) + catch + error:{unencodable_immediate, Val} -> + MoveI = jit_aarch64_asm:mov(Temp, Mask), + AndI = jit_aarch64_asm:and_(Temp, Reg, Temp), + StreamModule:append(Stream0, <>) + end, % Compare with value I2 = jit_aarch64_asm:cmp(Temp, Val), + Stream2 = StreamModule:append(Stream1, I2), + OffsetAfter = StreamModule:offset(Stream2), I3 = jit_aarch64_asm:bcc(eq, 0), - Code = << - I1/binary, - I2/binary, - I3/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), - State1 = State0#state{stream = Stream1}, - {State1, eq, byte_size(I1) + byte_size(I2)}; + Stream3 = StreamModule:append(Stream2, I3), + State1 = State0#state{stream = Stream3}, + {State1, eq, OffsetAfter - OffsetBefore}; if_block_cond( #state{ stream_module = StreamModule, @@ -843,19 +850,18 @@ if_block_cond( {{free, Reg} = RegTuple, '&', Mask, '!=', Val} ) when ?IS_GPR(Reg) -> % AND with mask - I1 = jit_aarch64_asm:and_(Reg, Reg, Mask), + OffsetBefore = StreamModule:offset(Stream0), + State1 = and_(State0, Reg, Mask), + Stream1 = State1#state.stream, % Compare with value I2 = jit_aarch64_asm:cmp(Reg, Val), + Stream2 = StreamModule:append(Stream1, I2), + OffsetAfter = StreamModule:offset(Stream2), I3 = jit_aarch64_asm:bcc(eq, 0), - Code = << - I1/binary, - I2/binary, - I3/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), - State1 = if_block_free_reg(RegTuple, State0), - State2 = State1#state{stream = Stream1}, - {State2, eq, byte_size(I1) + byte_size(I2)}; + Stream3 = StreamModule:append(Stream2, I3), + State3 = State1#state{stream = Stream3}, + State4 = if_block_free_reg(RegTuple, State3), + {State4, eq, OffsetAfter - OffsetBefore}; if_block_cond( #state{ stream_module = StreamModule, @@ -1023,7 +1029,8 @@ call_func_ptr( Stream4 = StreamModule:append(Stream3, Call), % If r0 is in used regs, save it to another temporary register - AvailableRegs1 = FreeRegs ++ AvailableRegs0, + FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), + AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, {Stream5, ResultReg} = case lists:member(r0, SavedRegs) of true -> @@ -1490,8 +1497,8 @@ move_to_array_element( IndexReg, Offset ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> - I1 = jit_aarch64_asm:add(Temp, BaseReg, Offset), - I2 = jit_aarch64_asm:str(ValueReg, {BaseReg, IndexReg, lsl, 3}), + I1 = jit_aarch64_asm:add(Temp, IndexReg, Offset), + I2 = jit_aarch64_asm:str(ValueReg, {BaseReg, Temp, lsl, 3}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_array_element( @@ -1503,8 +1510,8 @@ move_to_array_element( ) -> {State1, ValueReg} = copy_to_native_register(State0, Value), [Temp | _] = State1#state.available_regs, - I1 = jit_aarch64_asm:add(Temp, BaseReg, Offset), - I2 = jit_aarch64_asm:str(ValueReg, {BaseReg, IndexReg, lsl, 3}), + I1 = jit_aarch64_asm:add(Temp, IndexReg, Offset), + I2 = jit_aarch64_asm:str(ValueReg, {BaseReg, Temp, lsl, 3}), Stream1 = (State1#state.stream_module):append(State1#state.stream, <>), State2 = State1#state{stream = Stream1}, free_native_register(State2, ValueReg). @@ -1697,9 +1704,9 @@ set_continuation_to_offset( ) -> OffsetRef = make_ref(), Offset = StreamModule:offset(Stream0), - {RewriteLEAOffset, I1} = jit_x86_64_asm_unimplemented:leaq_rel32({-4, rip}, Temp), - Reloc = {OffsetRef, Offset + RewriteLEAOffset, 32}, - I2 = jit_x86_64_asm_unimplemented:movq(Temp, ?JITSTATE_CONTINUATION), + I1 = jit_aarch64_asm:adr(Temp, 0), + Reloc = {OffsetRef, Offset, {adr, Temp}}, + I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION), Code = <>, Stream1 = StreamModule:append(Stream0, Code), {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}. @@ -1722,8 +1729,17 @@ get_module_index( }. and_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - I1 = jit_aarch64_asm:and_(Reg, Reg, Val), - Stream1 = StreamModule:append(Stream0, I1), + Stream1 = + try + I = jit_aarch64_asm:and_(Reg, Reg, Val), + StreamModule:append(Stream0, I) + catch + error:{unencodable_immediate, Val} -> + [Temp | _] = State#state.available_regs, + I1 = jit_aarch64_asm:mov(Temp, Val), + I2 = jit_aarch64_asm:and_(Reg, Reg, Temp), + StreamModule:append(Stream0, <>) + end, State#state{stream = Stream1}. or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> @@ -1741,8 +1757,17 @@ or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> State#state{stream = Stream1}. add(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - I1 = jit_aarch64_asm:add(Reg, Reg, Val), - Stream1 = StreamModule:append(Stream0, I1), + Stream1 = + try + I = jit_aarch64_asm:add(Reg, Reg, Val), + StreamModule:append(Stream0, I) + catch + error:{unencodable_immediate, Val} -> + [Temp | _] = State#state.available_regs, + I1 = jit_aarch64_asm:mov(Temp, Val), + I2 = jit_aarch64_asm:add(Reg, Reg, Temp), + StreamModule:append(Stream0, <>) + end, State#state{stream = Stream1}. sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index c9fe76024..548128318 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -107,6 +107,8 @@ add(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm = %% AArch64 ADD (immediate) encoding: 1001000100iiiiiiiiiiiinnnnndddddd %% 0x91000000 | Imm << 10 | Rn << 5 | Rd <<(16#91000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>; +add(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) -> + error({unencodable_immediate, Imm}); add(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> add(Rd, Rn, Rm, {lsl, 0}). @@ -133,14 +135,15 @@ add(Rd, Rn, Rm, {lsl, Amount}) when b(Offset) when is_integer(Offset) -> %% AArch64 B encoding: 0b000101 | imm26 | 00000 %% imm26 is (Offset / 4) signed, fits in 26 bits - <<(16#14000000 bor (Offset div 4)):32/little>>. + Offset26 = Offset div 4, + <<(16#14000000 bor (Offset26 band 16#3FFFFFF)):32/little>>. %% Emit a breakpoint (BRK) instruction with immediate (AArch64 encoding) %% imm is a 16-bit immediate value (usually 0 for debuggers) -spec brk(integer()) -> binary(). brk(Imm) when is_integer(Imm), Imm >= 0, Imm =< 16#FFFF -> %% AArch64 BRK encoding: 11010100 00100000 00000000 iiiiiiii iiiiiiii - %% 0xd4200000 | (Imm << 5) + %% 0xd4200000 | Imm << 5 <<(16#D4200000 bor ((Imm band 16#FFFF) bsl 5)):32/little>>. %% Emit a branch with link register (BLR) instruction (AArch64 encoding) @@ -253,20 +256,20 @@ mov_immediate(Dst, Imm) when Imm < 0, Imm >= -16#FFFF -> <<(16#92800000 bor (((-Imm - 1) band 16#FFFF) bsl 5) bor DstNum):32/little>>; mov_immediate(Dst, Imm) when Imm >= 0 -> %% Complex positive immediate - build with MOVZ + MOVK sequence - build_positive_immediate(Dst, Imm); + build_positive_immediate(Dst, <>); mov_immediate(Dst, Imm) when Imm < 0 -> %% Complex negative immediate - try MOVN approach first - build_negative_immediate(Dst, Imm). + build_negative_immediate(Dst, <>). %% Build positive immediate using MOVZ + MOVK sequence --spec build_positive_immediate(aarch64_gpr_register(), integer()) -> binary(). -build_positive_immediate(Dst, Imm) -> +-spec build_positive_immediate(aarch64_gpr_register(), binary()) -> binary(). +build_positive_immediate(Dst, <> = ImmB) -> %% First try simple MOVZ/MOVK sequence for values with few non-zero chunks Chunks = [ - Imm band 16#FFFF, - (Imm bsr 16) band 16#FFFF, - (Imm bsr 32) band 16#FFFF, - (Imm bsr 48) band 16#FFFF + Imm1, + Imm2, + Imm3, + Imm4 ], NonZeroChunks = length([C || C <- Chunks, C =/= 0]), @@ -276,7 +279,7 @@ build_positive_immediate(Dst, Imm) -> build_immediate_sequence(Dst, Chunks); true -> %% For complex values, try bitmask immediate first - case encode_bitmask_immediate(Imm) of + case encode_bitmask_immediate(ImmB) of {ok, N, Immr, Imms} -> %% Use ORR immediate (MOV Rd, #imm is ORR Rd, XZR, #imm) orr_immediate(Dst, N, Immr, Imms); @@ -287,16 +290,16 @@ build_positive_immediate(Dst, Imm) -> end. %% Build negative immediate using MOVN or fallback to positive approach --spec build_negative_immediate(aarch64_gpr_register(), integer()) -> binary(). -build_negative_immediate(Dst, Imm) -> +-spec build_negative_immediate(aarch64_gpr_register(), binary()) -> binary(). +build_negative_immediate(Dst, ImmB) -> %% First try to encode as bitmask immediate with ORR - case encode_bitmask_immediate(Imm) of + case encode_bitmask_immediate(ImmB) of {ok, N, Immr, Imms} -> %% Use ORR immediate (MOV Rd, #imm is ORR Rd, XZR, #imm) orr_immediate(Dst, N, Immr, Imms); error -> %% Fallback to multi-instruction sequence - build_positive_immediate(Dst, Imm band 16#FFFFFFFFFFFFFFFF) + build_positive_immediate(Dst, ImmB) end. %% Build instruction sequence from chunks @@ -396,31 +399,25 @@ orr_immediate(Dst, N, Immr, Imms) when %% Encode a value as AArch64 bitmask immediate %% Returns {ok, N, Immr, Imms} if encodable, error otherwise --spec encode_bitmask_immediate(integer()) -> {ok, 0..1, integer(), integer()} | error. -encode_bitmask_immediate(Value) when is_integer(Value) -> - %% Convert to 64-bit unsigned - UnsignedValue = Value band 16#FFFFFFFFFFFFFFFF, - +-spec encode_bitmask_immediate(binary()) -> {ok, 0..1, integer(), integer()} | error. +encode_bitmask_immediate(Value) when byte_size(Value) =:= 8 -> %% Try different pattern sizes (64, 32, 16, 8, 4, 2) PatternSizes = [64, 32, 16, 8, 4, 2], - try_pattern_sizes(UnsignedValue, PatternSizes). + try_pattern_sizes(Value, PatternSizes). %% Encode a value as AArch64 bitmask immediate for 32 bits values %% Returns {ok, Immr, Imms} if encodable, error otherwise --spec encode_bitmask_immediate_w(integer()) -> {ok, integer(), integer()} | error. -encode_bitmask_immediate_w(Value) when is_integer(Value) -> - %% Convert to 64-bit unsigned - UnsignedValue = Value band 16#FFFFFFFFFFFFFFFF, - +-spec encode_bitmask_immediate_w(binary()) -> {ok, integer(), integer()} | error. +encode_bitmask_immediate_w(Value) when byte_size(Value) =:= 4 -> %% Try different pattern sizes (32, 16, 8, 4, 2) PatternSizes = [32, 16, 8, 4, 2], - case try_pattern_sizes(UnsignedValue, PatternSizes) of + case try_pattern_sizes(Value, PatternSizes) of {ok, 0, Immr, Imms} -> {ok, Immr, Imms}; error -> error end. %% Try encoding with different pattern sizes --spec try_pattern_sizes(integer(), [integer()]) -> {ok, integer(), integer(), integer()} | error. +-spec try_pattern_sizes(binary(), [integer()]) -> {ok, integer(), integer(), integer()} | error. try_pattern_sizes(_, []) -> error; try_pattern_sizes(Value, [Size | Rest]) -> @@ -430,37 +427,17 @@ try_pattern_sizes(Value, [Size | Rest]) -> end. %% Try to encode value with a specific pattern size --spec try_encode_pattern_size(integer(), integer()) -> +-spec try_encode_pattern_size(binary(), integer()) -> {ok, integer(), integer(), integer()} | error. try_encode_pattern_size(Value, Size) -> - %% Extract the pattern of the given size - Mask = (1 bsl Size) - 1, - Pattern = Value band Mask, - - %% Check if the value is just this pattern repeated - case is_repeating_pattern(Value, Pattern, Size) of - true -> try_encode_single_pattern(Pattern, Size); - false -> error + <> = Value, + if + Value =:= <> -> + try_encode_single_pattern(Pattern, Size); + true -> + error end. -%% Check if value consists of pattern repeated --spec is_repeating_pattern(integer(), integer(), integer()) -> boolean(). -is_repeating_pattern(Value, Pattern, Size) -> - is_repeating_pattern(Value, Pattern, Size, 0). - -is_repeating_pattern(0, 0, _, _) -> - true; -is_repeating_pattern(Value, Pattern, Size, Pos) when Pos < 64 -> - Mask = (1 bsl Size) - 1, - CurrentPattern = (Value bsr Pos) band Mask, - case CurrentPattern of - Pattern when (Value bsr (Pos + Size)) =:= 0 -> true; - Pattern -> is_repeating_pattern(Value, Pattern, Size, Pos + Size); - _ -> false - end; -is_repeating_pattern(_, _, _, _) -> - false. - %% Try to encode a single pattern as bitmask immediate -spec try_encode_single_pattern(integer(), integer()) -> {ok, integer(), integer(), integer()} | error. @@ -551,7 +528,7 @@ orr(DstReg, Rn, Rm) when is_atom(DstReg), is_atom(Rn), is_atom(Rm) -> orr(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) -> RdNum = reg_to_num(Rd), RnNum = reg_to_num(Rn), - case encode_bitmask_immediate(Imm) of + case encode_bitmask_immediate(<>) of {ok, N, Immr, Imms} -> % OR immediate encoding: sf=1(64b) 01(op) 100100 N immr imms Rn Rd Opcode = 16#B2000000, @@ -784,7 +761,8 @@ bcc(Cond, Offset) when is_atom(Cond), is_integer(Offset) -> % Never nv -> 15 end, - <<(16#54000000 bor ((Offset div 4) bsl 5) bor CondNum):32/little>>. + Offset19 = Offset div 4, + <<(16#54000000 bor ((Offset19 band 16#7FFFF) bsl 5) bor CondNum):32/little>>. %% Emit a compare instruction -spec cmp(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). @@ -798,7 +776,14 @@ cmp(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 -> RnNum = reg_to_num(Rn), %% AArch64 CMP (immediate) encoding: CMP Rn, #imm %% This is SUBS XZR, Rn, #imm: 1111000100iiiiiiiiiiiinnnnn11111 - <<(16#F100001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little>>. + <<(16#F100001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little>>; +cmp(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> + %% For large immediates, load into a temporary register and compare + %% Use r16 as temporary register (caller-saved) + TempReg = r16, + LoadInstr = build_positive_immediate(TempReg, <>), + CmpInstr = cmp(Rn, TempReg), + <>. %% Emit a 32-bit compare instruction -spec cmp32(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). @@ -812,7 +797,13 @@ cmp32(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 -> RnNum = reg_to_num(Rn), %% AArch64 CMP (32-bit immediate) encoding: CMP Wn, #imm %% This is SUBS WZR, Wn, #imm: 0111000100iiiiiiiiiiiinnnnn11111 - <<(16#7100001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little>>. + <<(16#7100001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little>>; +cmp32(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm < 0, Imm >= -4095 -> + RnNum = reg_to_num(Rn), + %% For negative immediates, use ADD form: CMP Wn, #(-imm) becomes ADDS WZR, Wn, #(-imm) + %% AArch64 ADDS (32-bit immediate) encoding: 0011000100iiiiiiiiiiiinnnnn11111 + PosImm = -Imm, + <<(16#3100001F bor ((PosImm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little>>. %% Emit an AND instruction (bitwise AND) -spec and_(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> @@ -829,7 +820,7 @@ and_(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> and_(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) -> RdNum = reg_to_num(Rd), RnNum = reg_to_num(Rn), - case encode_bitmask_immediate(Imm) of + case encode_bitmask_immediate(<>) of {ok, N, Immr, Imms} -> % AND immediate encoding: sf=1(64b) 00(op) 100100 N immr imms Rn Rd Opcode = 16#92000000, @@ -889,7 +880,7 @@ tst(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> <<(16#EA00001F bor (RmNum bsl 16) bor (RnNum bsl 5)):32/little>>; tst(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> RnNum = reg_to_num(Rn), - case encode_bitmask_immediate(Imm) of + case encode_bitmask_immediate(<>) of {ok, N, Immr, Imms} -> << (16#F200001F bor (N bsl 22) bor (Immr bsl 16) bor (Imms bsl 10) bor (RnNum bsl 5)):32/little @@ -908,7 +899,7 @@ tst_w(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> <<(16#6A00001F bor (RmNum bsl 16) bor (RnNum bsl 5)):32/little>>; tst_w(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> RnNum = reg_to_num(Rn), - case encode_bitmask_immediate_w(Imm) of + case encode_bitmask_immediate_w(<>) of {ok, Immr, Imms} -> <<(16#7200001F bor (Immr bsl 16) bor (Imms bsl 10) bor (RnNum bsl 5)):32/little>>; _ -> diff --git a/tests/erlang_tests/test_add_avm_pack_binary.erl b/tests/erlang_tests/test_add_avm_pack_binary.erl index 807b4a955..c4767a2c6 100644 --- a/tests/erlang_tests/test_add_avm_pack_binary.erl +++ b/tests/erlang_tests/test_add_avm_pack_binary.erl @@ -28,11 +28,18 @@ load_pack_data() -> ?CODE_LOAD_PACK_DATA. -else. +-if(?AVM_JIT_TARGET_ARCH == aarch64). +-include("code_load/code_load_pack_data_aarch64.hrl"). + +load_pack_data() -> + ?CODE_LOAD_PACK_DATA_aarch64. +-elif(?AVM_JIT_TARGET_ARCH == x86_64). -include("code_load/code_load_pack_data_x86_64.hrl"). load_pack_data() -> ?CODE_LOAD_PACK_DATA_x86_64. -endif. +-endif. start() -> Bin = load_pack_data(), diff --git a/tests/erlang_tests/test_add_avm_pack_file.erl b/tests/erlang_tests/test_add_avm_pack_file.erl index 53551c3e3..5533c2ff5 100644 --- a/tests/erlang_tests/test_add_avm_pack_file.erl +++ b/tests/erlang_tests/test_add_avm_pack_file.erl @@ -22,11 +22,15 @@ -export([start/0]). +-ifdef(AVM_DISABLE_JIT). +path() -> + "code_load/code_load_pack.avm". +-else. +path() -> + "../code_load/code_load_pack-" ++ atom_to_list(?AVM_JIT_TARGET_ARCH) ++ ".avm". +-endif. + start() -> - AVM = - case erlang:system_info(emu_flavor) of - emu -> "code_load/code_load_pack.avm"; - jit -> "../code_load/code_load_pack-x86_64.avm" - end, + AVM = path(), erlang:display(atomvm:add_avm_pack_file(AVM, [])), export_test_module:exported_func(4). diff --git a/tests/erlang_tests/test_close_avm_pack.erl b/tests/erlang_tests/test_close_avm_pack.erl index f89feccb0..32de96c33 100644 --- a/tests/erlang_tests/test_close_avm_pack.erl +++ b/tests/erlang_tests/test_close_avm_pack.erl @@ -28,11 +28,23 @@ load_pack_data() -> ?CODE_LOAD_PACK_DATA. -else. +-if(?AVM_JIT_TARGET_ARCH == aarch64). +-include("code_load/code_load_pack_data_aarch64.hrl"). + +load_pack_data() -> + ?CODE_LOAD_PACK_DATA_aarch64. +-elif(?AVM_JIT_TARGET_ARCH == x86_64). +-include("code_load/code_load_pack_data_x86_64.hrl"). + +load_pack_data() -> + ?CODE_LOAD_PACK_DATA_x86_64. +-else. -include("code_load/code_load_pack_data_x86_64.hrl"). load_pack_data() -> ?CODE_LOAD_PACK_DATA_x86_64. -endif. +-endif. start() -> Bin = load_pack_data(), diff --git a/tests/erlang_tests/test_code_load_abs.erl b/tests/erlang_tests/test_code_load_abs.erl index a421ac598..a3292fe4c 100644 --- a/tests/erlang_tests/test_code_load_abs.erl +++ b/tests/erlang_tests/test_code_load_abs.erl @@ -22,14 +22,19 @@ -export([start/0]). +-ifdef(AVM_DISABLE_JIT). +path() -> + "code_load/export_test_module". +-else. +path() -> + "../code_load/" ++ atom_to_list(?AVM_JIT_TARGET_ARCH) ++ "/export_test_module". +-endif. + start() -> Path = case erlang:system_info(machine) of "ATOM" -> - case erlang:system_info(emu_flavor) of - emu -> "code_load/export_test_module"; - jit -> "../code_load/x86_64/export_test_module" - end; + path(); "BEAM" -> "code_load/export_test_module" end, diff --git a/tests/erlang_tests/test_code_load_binary.erl b/tests/erlang_tests/test_code_load_binary.erl index 758e0ac69..1ac580c01 100644 --- a/tests/erlang_tests/test_code_load_binary.erl +++ b/tests/erlang_tests/test_code_load_binary.erl @@ -28,11 +28,18 @@ export_test_module_data() -> ?EXPORT_TEST_MODULE_DATA. -else. +-if(?AVM_JIT_TARGET_ARCH == aarch64). +-include("code_load/export_test_module_data_aarch64.hrl"). + +export_test_module_data() -> + ?EXPORT_TEST_MODULE_DATA_aarch64. +-elif(?AVM_JIT_TARGET_ARCH == x86_64). -include("code_load/export_test_module_data_x86_64.hrl"). export_test_module_data() -> ?EXPORT_TEST_MODULE_DATA_x86_64. -endif. +-endif. start() -> Bin = export_test_module_data(), diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index d30054f6a..21a82ffa1 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -79,6 +79,7 @@ b_test_() -> [ ?_assertEqual(<<16#14000000:32/little>>, jit_aarch64_asm:b(0)), ?_assertEqual(<<16#14000004:32/little>>, jit_aarch64_asm:b(16)), + ?_assertEqual(<<16#17fffff0:32/little>>, jit_aarch64_asm:b(-64)), ?_assertEqual(<<16#14000001:32/little>>, jit_aarch64_asm:b(4)) ]. @@ -307,6 +308,7 @@ bcc_test_() -> [ ?_assertEqual(<<16#54000000:32/little>>, jit_aarch64_asm:bcc(eq, 0)), ?_assertEqual(<<16#54000001:32/little>>, jit_aarch64_asm:bcc(ne, 0)), + ?_assertEqual(<<16#54fffe01:32/little>>, jit_aarch64_asm:bcc(ne, -64)), ?_assertEqual(<<16#54000400:32/little>>, jit_aarch64_asm:bcc(eq, 128)) ]. diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index d901ab027..977884c96 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -808,23 +808,29 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: x_reg to reg[x+offset] ?_test(begin - State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, r9, 1), - Stream = ?BACKEND:stream(State1), + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), + State2 = setelement(8, State1, [r8, r9]), + [r8, r9] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r8, r9, 1), + Stream = ?BACKEND:stream(State3), Dump = << " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: 91000508 add x8, x8, #0x1\n" - " 8: f8297907 str x7, [x8, x9, lsl #3]" + " 4: 9100052a add x10, x9, #0x1\n" + " 8: f82a7907 str x7, [x8, x10, lsl #3]" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), %% move_to_array_element/5: imm to reg[x+offset] ?_test(begin - State1 = ?BACKEND:move_to_array_element(State0, 42, r8, r9, 1), - Stream = ?BACKEND:stream(State1), + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), + State2 = setelement(8, State1, [r8, r9]), + [r8, r9] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, 42, r8, r9, 1), + Stream = ?BACKEND:stream(State3), Dump = << " 0: d2800547 mov x7, #0x2a // #42\n" - " 4: 91000508 add x8, x8, #0x1\n" - " 8: f8297907 str x7, [x8, x9, lsl #3]" + " 4: 9100052a add x10, x9, #0x1\n" + " 8: f82a7907 str x7, [x8, x10, lsl #3]" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) diff --git a/tests/test.c b/tests/test.c index 51977c1d2..a48ad69ca 100644 --- a/tests/test.c +++ b/tests/test.c @@ -32,6 +32,7 @@ #include "bif.h" #include "context.h" #include "iff.h" +#include "jit.h" #include "mapped_file.h" #include "module.h" #include "term.h" @@ -698,7 +699,12 @@ int test_modules_execution(bool beam, bool skip, int count, char **item) if (!beam) { #if JIT_ARCH_TARGET == JIT_ARCH_X86_64 if (chdir("x86_64") != 0) { - perror("Error: "); + perror("Error: cannot find x86_64 directory"); + return EXIT_FAILURE; + } +#elif JIT_ARCH_TARGET == JIT_ARCH_AARCH64 + if (chdir("aarch64") != 0) { + perror("Error: cannot find aarch64 directory"); return EXIT_FAILURE; } #else From 7a8c60ac21f54245a4010604d85bff04c8889cd7 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 19 Aug 2025 22:37:27 +0200 Subject: [PATCH 27/46] AArch64: fix mmap usage on macOS Signed-off-by: Paul Guyot --- src/platforms/generic_unix/CMakeLists.txt | 19 +++++++------- .../generic_unix/lib/jit_stream_mmap.c | 25 ++++++++++--------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/platforms/generic_unix/CMakeLists.txt b/src/platforms/generic_unix/CMakeLists.txt index 668cb3db1..933971dd3 100644 --- a/src/platforms/generic_unix/CMakeLists.txt +++ b/src/platforms/generic_unix/CMakeLists.txt @@ -39,11 +39,6 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") endif() endif() -if(NOT AVM_DISABLE_JIT) -include(DefineIfExists) -define_if_function_exists(AtomVM pthread_jit_write_protect_np "pthread.h" PUBLIC HAVE_PTHREAD_JIT_WRITE_PROTECT_NP) -define_if_symbol_exists(AtomVM MAP_JIT "sys/mman.h" PUBLIC HAVE_MAP_JIT) -endif() add_subdirectory(lib) target_include_directories(AtomVM PUBLIC lib/) @@ -57,17 +52,21 @@ set( ) target_link_libraries(AtomVM PRIVATE libAtomVM${PLATFORM_LIB_SUFFIX}) +if(AVM_DISABLE_JIT) +set(precompiled_suffix) +else() +set(precompiled_suffix "-${AVM_JIT_TARGET_ARCH}") +include(DefineIfExists) +define_if_function_exists(libAtomVM${PLATFORM_LIB_SUFFIX} pthread_jit_write_protect_np "pthread.h" PUBLIC HAVE_PTHREAD_JIT_WRITE_PROTECT_NP) +define_if_symbol_exists(libAtomVM${PLATFORM_LIB_SUFFIX} MAP_JIT "sys/mman.h" PUBLIC HAVE_MAP_JIT) +endif() + if (COVERAGE) include(CodeCoverage) append_coverage_compiler_flags_to_target(AtomVM) append_coverage_linker_flags_to_target(AtomVM) endif() -if(AVM_DISABLE_JIT) -set(precompiled_suffix) -else() -set(precompiled_suffix "-${AVM_JIT_TARGET_ARCH}") -endif() configure_file(${CMAKE_CURRENT_SOURCE_DIR}/atomvm ${CMAKE_CURRENT_BINARY_DIR}/atomvm @ONLY) install(TARGETS AtomVM DESTINATION lib/atomvm) diff --git a/src/platforms/generic_unix/lib/jit_stream_mmap.c b/src/platforms/generic_unix/lib/jit_stream_mmap.c index bed7819c2..6ad4262ee 100644 --- a/src/platforms/generic_unix/lib/jit_stream_mmap.c +++ b/src/platforms/generic_unix/lib/jit_stream_mmap.c @@ -74,6 +74,7 @@ static term nif_jit_stream_mmap_new(Context *ctx, int argc, term argv[]) uint8_t *addr = (uint8_t *) mmap(0, size, prot, flags, fd, offset); if (addr == MAP_FAILED) { + fprintf(stderr, "Could not allocate mmap for JIT"); RAISE_ERROR(BADARG_ATOM); } @@ -87,15 +88,6 @@ static term nif_jit_stream_mmap_new(Context *ctx, int argc, term argv[]) js->stream_offset = 0; js->stream_size = size; -#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP - pthread_jit_write_protect_np(0); -#endif -#if defined(__APPLE__) - sys_icache_invalidate(addr, size); -#elif defined(__GNUC__) - __builtin___clear_cache(addr, addr + size); -#endif - term obj = enif_make_resource(erl_nif_env_from_context(ctx), js); enif_release_resource(js); // decrement refcount after enif_alloc_resource return obj; @@ -129,7 +121,13 @@ static term nif_jit_stream_mmap_append(Context *ctx, int argc, term argv[]) const uint8_t *binary_data = (const uint8_t *) term_binary_data(argv[1]); assert(js_obj->stream_offset + binary_size < js_obj->stream_size); +#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP + pthread_jit_write_protect_np(0); +#endif memcpy(js_obj->stream_base + js_obj->stream_offset, binary_data, binary_size); +#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP + pthread_jit_write_protect_np(1); +#endif js_obj->stream_offset += binary_size; return argv[0]; @@ -155,7 +153,13 @@ static term nif_jit_stream_mmap_replace(Context *ctx, int argc, term argv[]) RAISE_ERROR(BADARG_ATOM); } +#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP + pthread_jit_write_protect_np(0); +#endif memcpy(js_obj->stream_base + offset, binary_data, binary_size); +#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP + pthread_jit_write_protect_np(1); +#endif return argv[0]; } @@ -232,9 +236,6 @@ ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) return NULL; } -#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP - pthread_jit_write_protect_np(1); -#endif #if defined(__APPLE__) sys_icache_invalidate(js_obj->stream_base, js_obj->stream_size); #elif defined(__GNUC__) From ef450536e84f84093fd0224fab9282287d072a96 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 22 Aug 2025 20:54:17 +0200 Subject: [PATCH 28/46] AArch64: Fix bug in remaining reductions handling Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 8 +++--- libs/jit/src/jit_aarch64_asm.erl | 23 ++++++++++++++--- tests/libs/jit/jit_aarch64_asm_tests.erl | 26 +++++++++++++++++-- tests/libs/jit/jit_aarch64_tests.erl | 32 +++++++++++++----------- 4 files changed, 64 insertions(+), 25 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index b6e565b7f..dbc1df852 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -1836,11 +1836,11 @@ decrement_reductions_and_maybe_schedule_next( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0 ) -> % Load reduction count - I1 = jit_aarch64_asm:ldr(Temp, ?JITSTATE_REDUCTIONCOUNT), + I1 = jit_aarch64_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT), % Decrement reduction count I2 = jit_aarch64_asm:subs(Temp, Temp, 1), % Store back the decremented value - I3 = jit_aarch64_asm:str(Temp, ?JITSTATE_REDUCTIONCOUNT), + I3 = jit_aarch64_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT), Stream1 = StreamModule:append(Stream0, <>), BNEOffset = StreamModule:offset(Stream1), % Branch if reduction count is not zero @@ -1879,11 +1879,11 @@ call_only_or_schedule_next( Label ) -> % Load reduction count - I1 = jit_aarch64_asm:ldr(Temp, ?JITSTATE_REDUCTIONCOUNT), + I1 = jit_aarch64_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT), % Decrement reduction count I2 = jit_aarch64_asm:subs(Temp, Temp, 1), % Store back the decremented value - I3 = jit_aarch64_asm:str(Temp, ?JITSTATE_REDUCTIONCOUNT), + I3 = jit_aarch64_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT), Stream1 = StreamModule:append(Stream0, <>), BNEOffset = StreamModule:offset(Stream1), % Branch to label if reduction count is not zero diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index 548128318..ac78e1db9 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -46,6 +46,7 @@ ret/0, nop/0, str/2, + str_w/2, str/3, tst/2, tst_w/2, @@ -225,13 +226,11 @@ ldr_w(Dst, {BaseReg, Offset}) when is_atom(BaseReg), is_integer(Offset), Offset >= 0, - Offset =< 32760, - (Offset rem 8) =:= 0 + Offset =< 16380, + (Offset rem 4) =:= 0 -> DstNum = reg_to_num(Dst), BaseRegNum = reg_to_num(BaseReg), - %% AArch64 LDR (immediate) encoding for 64-bit: 11111001010iiiiiiiiiiibbbbbttttt - %% 0xf9400000 | (Offset div 8) << 10 | BaseReg << 5 | Dst << (16#B9400000 bor ((Offset div 4) bsl 10) bor (BaseRegNum bsl 5) bor DstNum):32/little >>. @@ -588,6 +587,22 @@ str(Reg, {Base}, Imm) when BaseNum = reg_to_num(Base), <<(16#F8000400 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>. +%% Emit a store register (STR) instruction for 32-bit store to memory +-spec str_w(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary(). +str_w(Src, {BaseReg, Offset}) when + is_atom(Src), + is_atom(BaseReg), + is_integer(Offset), + Offset >= 0, + Offset =< 16380, + (Offset rem 4) =:= 0 +-> + SrcNum = reg_to_num(Src), + BaseRegNum = reg_to_num(BaseReg), + << + (16#B9000000 bor ((Offset div 4) bsl 10) bor (BaseRegNum bsl 5) bor SrcNum):32/little + >>. + %% Emit a load register (LDR) instruction for 64-bit store to memory, with store-update (writeback) -spec ldr (aarch64_gpr_register(), {aarch64_gpr_register(), integer()}, '!') -> binary(); diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index 21a82ffa1..ec547901f 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -139,8 +139,30 @@ ldr_test_() -> ldr_w_test_() -> [ - ?_assertEqual(<<16#b9400821:32/little>>, jit_aarch64_asm:ldr_w(r1, {r1, 8})), - ?_assertEqual(<<16#b9406042:32/little>>, jit_aarch64_asm:ldr_w(r2, {r2, 96})) + ?_assertEqual( + asm(<<16#b9400821:32/little>>, "ldr w1, [x1, 8]"), jit_aarch64_asm:ldr_w(r1, {r1, 8}) + ), + ?_assertEqual( + asm(<<16#b9406042:32/little>>, "ldr w2, [x2, 96]"), jit_aarch64_asm:ldr_w(r2, {r2, 96}) + ), + ?_assertEqual( + asm(<<16#b97ffc60:32/little>>, "ldr w0, [x3, 16380]"), + jit_aarch64_asm:ldr_w(r0, {r3, 16380}) + ) + ]. + +str_w_test_() -> + [ + ?_assertEqual( + asm(<<16#b9000821:32/little>>, "str w1, [x1, 8]"), jit_aarch64_asm:str_w(r1, {r1, 8}) + ), + ?_assertEqual( + asm(<<16#b9006042:32/little>>, "str w2, [x2, 96]"), jit_aarch64_asm:str_w(r2, {r2, 96}) + ), + ?_assertEqual( + asm(<<16#b93ffc60:32/little>>, "str w0, [x3, 16380]"), + jit_aarch64_asm:str_w(r0, {r3, 16380}) + ) ]. ldr_d_test_() -> diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 977884c96..deb00c7b2 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -41,7 +41,7 @@ call_primitive_0_test() -> Stream = ?BACKEND:stream(State1), Dump = << - "0: f9400050 ldr x16, [x2]\n" + " 0: f9400050 ldr x16, [x2]\n" " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" " c: d63f0200 blr x16\n" @@ -58,7 +58,7 @@ call_primitive_1_test() -> Stream = ?BACKEND:stream(State1), Dump = << - "0: f9400450 ldr x16, [x2, #8]\n" + " 0: f9400450 ldr x16, [x2, #8]\n" " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" " c: d63f0200 blr x16\n" @@ -149,20 +149,22 @@ call_primitive_extended_regs_test() -> call_ext_only_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), - State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, -1]), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, 2, 2, -1]), Stream = ?BACKEND:stream(State2), Dump = << - " 0: f9400827 ldr x7, [x1, #16]\n" + " 0: b9401027 ldr w7, [x1, #16]\n" " 4: f10004e7 subs x7, x7, #0x1\n" - " 8: f9000827 str x7, [x1, #16]\n" + " 8: b9001027 str w7, [x1, #16]\n" " c: 540000a1 b.ne 0x20 // b.any\n" " 10: 10000087 adr x7, 0x20\n" " 14: f9000427 str x7, [x1, #8]\n" " 18: f9400847 ldr x7, [x2, #16]\n" " 1c: d61f00e0 br x7\n" " 20: f9401047 ldr x7, [x2, #32]\n" - " 24: 92800002 mov x2, #0xffffffffffffffff // #-1\n" - " 28: d61f00e0 br x7" + " 24: d2800042 mov x2, #0x2 // #2\n" + " 28: d2800043 mov x3, #0x2 // #2\n" + " 2c: 92800004 mov x4, #0xffffffffffffffff // #-1\n" + " 30: d61f00e0 br x7" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -172,9 +174,9 @@ call_ext_last_test() -> State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, 2, 2, 10]), Stream = ?BACKEND:stream(State2), Dump = << - " 0: f9400827 ldr x7, [x1, #16]\n" + " 0: b9401027 ldr w7, [x1, #16]\n" " 4: f10004e7 subs x7, x7, #0x1\n" - " 8: f9000827 str x7, [x1, #16]\n" + " 8: b9001027 str w7, [x1, #16]\n" " c: 540000a1 b.ne 0x20 // b.any\n" " 10: 10000087 adr x7, 0x20\n" " 14: f9000427 str x7, [x1, #8]\n" @@ -241,9 +243,9 @@ call_only_or_schedule_next_and_label_relocation_test() -> " 0: 1400000d b 0x34\n" " 4: 14000002 b 0xc\n" " 8: 14000009 b 0x2c\n" - " c: f9400827 ldr x7, [x1, #16]\n" + " c: b9401027 ldr w7, [x1, #16]\n" " 10: f10004e7 subs x7, x7, #0x1\n" - " 14: f9000827 str x7, [x1, #16]\n" + " 14: b9001027 str w7, [x1, #16]\n" " 18: 540000a1 b.ne 0x2c // b.any\n" " 1c: 10000087 adr x7, 0x2c\n" " 20: f9000427 str x7, [x1, #8]\n" @@ -413,9 +415,9 @@ call_ext_test() -> ?BACKEND:assert_all_native_free(State2), Stream = ?BACKEND:stream(State2), Dump = << - " 0: f9400827 ldr x7, [x1, #16]\n" + " 0: b9401027 ldr w7, [x1, #16]\n" " 4: f10004e7 subs x7, x7, #0x1\n" - " 8: f9000827 str x7, [x1, #16]\n" + " 8: b9001027 str w7, [x1, #16]\n" " c: 540000a1 b.ne 0x20 // b.any\n" " 10: 10000087 adr x7, 0x20\n" " 14: f9000427 str x7, [x1, #8]\n" @@ -465,9 +467,9 @@ call_fun_test() -> ?BACKEND:assert_all_native_free(State9), Stream = ?BACKEND:stream(State9), Dump = << - " 0: f9400827 ldr x7, [x1, #16]\n" + " 0: b9401027 ldr w7, [x1, #16]\n" " 4: f10004e7 subs x7, x7, #0x1\n" - " 8: f9000827 str x7, [x1, #16]\n" + " 8: b9001027 str w7, [x1, #16]\n" " c: 540000a1 b.ne 0x20 // b.any\n" " 10: 10000087 adr x7, 0x20\n" " 14: f9000427 str x7, [x1, #8]\n" From 07d900b876541c4a883498fce7c45e069eb0fe17 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 22 Aug 2025 22:16:55 +0200 Subject: [PATCH 29/46] AArch64: factorize and increase coverage Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 57 +++++++++--------------- tests/libs/jit/jit_aarch64_asm_tests.erl | 57 +++++++++++++++++++++++- 2 files changed, 77 insertions(+), 37 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index dbc1df852..cbf8f2439 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -824,24 +824,16 @@ if_block_cond( ) when ?IS_GPR(Reg) -> % AND with mask OffsetBefore = StreamModule:offset(Stream0), - Stream1 = - try - I = jit_aarch64_asm:and_(Temp, Reg, Mask), - StreamModule:append(Stream0, I) - catch - error:{unencodable_immediate, Val} -> - MoveI = jit_aarch64_asm:mov(Temp, Mask), - AndI = jit_aarch64_asm:and_(Temp, Reg, Temp), - StreamModule:append(Stream0, <>) - end, + State1 = op_imm(State0, and_, Temp, Reg, Mask), + Stream1 = State1#state.stream, % Compare with value I2 = jit_aarch64_asm:cmp(Temp, Val), Stream2 = StreamModule:append(Stream1, I2), OffsetAfter = StreamModule:offset(Stream2), I3 = jit_aarch64_asm:bcc(eq, 0), Stream3 = StreamModule:append(Stream2, I3), - State1 = State0#state{stream = Stream3}, - {State1, eq, OffsetAfter - OffsetBefore}; + State2 = State1#state{stream = Stream3}, + {State2, eq, OffsetAfter - OffsetBefore}; if_block_cond( #state{ stream_module = StreamModule, @@ -1728,47 +1720,40 @@ get_module_index( Reg }. -and_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> +op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, Reg, Reg, Val) -> Stream1 = try - I = jit_aarch64_asm:and_(Reg, Reg, Val), + I = jit_aarch64_asm:Op(Reg, Reg, Val), StreamModule:append(Stream0, I) catch error:{unencodable_immediate, Val} -> [Temp | _] = State#state.available_regs, I1 = jit_aarch64_asm:mov(Temp, Val), - I2 = jit_aarch64_asm:and_(Reg, Reg, Temp), + I2 = jit_aarch64_asm:Op(Reg, Reg, Temp), StreamModule:append(Stream0, <>) end, - State#state{stream = Stream1}. - -or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> + State#state{stream = Stream1}; +op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, RegA, RegB, Val) -> Stream1 = try - I = jit_aarch64_asm:orr(Reg, Reg, Val), + I = jit_aarch64_asm:Op(RegA, RegB, Val), StreamModule:append(Stream0, I) catch error:{unencodable_immediate, Val} -> - [Temp | _] = State#state.available_regs, - I1 = jit_aarch64_asm:mov(Temp, Val), - I2 = jit_aarch64_asm:orr(Reg, Reg, Temp), - StreamModule:append(Stream0, <>) + MoveI = jit_aarch64_asm:mov(RegA, Val), + AndI = jit_aarch64_asm:Op(RegA, RegB, RegA), + StreamModule:append(Stream0, <>) end, State#state{stream = Stream1}. -add(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - Stream1 = - try - I = jit_aarch64_asm:add(Reg, Reg, Val), - StreamModule:append(Stream0, I) - catch - error:{unencodable_immediate, Val} -> - [Temp | _] = State#state.available_regs, - I1 = jit_aarch64_asm:mov(Temp, Val), - I2 = jit_aarch64_asm:add(Reg, Reg, Temp), - StreamModule:append(Stream0, <>) - end, - State#state{stream = Stream1}. +and_(State, Reg, Val) -> + op_imm(State, and_, Reg, Reg, Val). + +or_(State, Reg, Val) -> + op_imm(State, orr, Reg, Reg, Val). + +add(State, Reg, Val) -> + op_imm(State, add, Reg, Reg, Val). sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> I1 = jit_aarch64_asm:sub(Reg, Reg, Val), diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index ec547901f..1702c0021 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -331,7 +331,62 @@ bcc_test_() -> ?_assertEqual(<<16#54000000:32/little>>, jit_aarch64_asm:bcc(eq, 0)), ?_assertEqual(<<16#54000001:32/little>>, jit_aarch64_asm:bcc(ne, 0)), ?_assertEqual(<<16#54fffe01:32/little>>, jit_aarch64_asm:bcc(ne, -64)), - ?_assertEqual(<<16#54000400:32/little>>, jit_aarch64_asm:bcc(eq, 128)) + ?_assertEqual( + asm(<<16#54000400:32/little>>, "b.eq 128"), + jit_aarch64_asm:bcc(eq, 128) + ), + ?_assertEqual( + asm(<<16#54000402:32/little>>, "b.cs 128"), + jit_aarch64_asm:bcc(cs, 128) + ), + ?_assertEqual( + asm(<<16#54000403:32/little>>, "b.cc 128"), + jit_aarch64_asm:bcc(cc, 128) + ), + ?_assertEqual( + asm(<<16#54000404:32/little>>, "b.mi 128"), + jit_aarch64_asm:bcc(mi, 128) + ), + ?_assertEqual( + asm(<<16#54000405:32/little>>, "b.pl 128"), + jit_aarch64_asm:bcc(pl, 128) + ), + ?_assertEqual( + asm(<<16#54000406:32/little>>, "b.vs 128"), + jit_aarch64_asm:bcc(vs, 128) + ), + ?_assertEqual( + asm(<<16#54000408:32/little>>, "b.hi 128"), + jit_aarch64_asm:bcc(hi, 128) + ), + ?_assertEqual( + asm(<<16#54000409:32/little>>, "b.ls 128"), + jit_aarch64_asm:bcc(ls, 128) + ), + ?_assertEqual( + asm(<<16#5400040a:32/little>>, "b.ge 128"), + jit_aarch64_asm:bcc(ge, 128) + ), + ?_assertEqual( + asm(<<16#5400040b:32/little>>, "b.lt 128"), + jit_aarch64_asm:bcc(lt, 128) + ), + ?_assertEqual( + asm(<<16#5400040c:32/little>>, "b.gt 128"), + jit_aarch64_asm:bcc(gt, 128) + ), + ?_assertEqual( + asm(<<16#5400040d:32/little>>, "b.le 128"), + jit_aarch64_asm:bcc(le, 128) + ), + ?_assertEqual( + asm(<<16#5400040e:32/little>>, "b.al 128"), + jit_aarch64_asm:bcc(al, 128) + ), + ?_assertEqual( + asm(<<16#5400040f:32/little>>, "b.nv 128"), + jit_aarch64_asm:bcc(nv, 128) + ) ]. stp_test_() -> From 67a5898045f20656f4040028a856e9294c2a5176 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 23 Aug 2025 12:18:47 +0200 Subject: [PATCH 30/46] AArch64: increase coverage and use cbz/tbz/tbnz Also simplify {RegOrTuple, '&', Val, '!=', 0} when possible Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 221 ++++--- libs/jit/src/jit_aarch64_asm.erl | 51 +- tests/libs/jit/jit_aarch64_asm_tests.erl | 58 +- tests/libs/jit/jit_aarch64_tests.erl | 705 ++++++++++++++++++++++- 4 files changed, 895 insertions(+), 140 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index cbf8f2439..54ec8ebf9 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -158,12 +158,15 @@ {free, aarch64_register()} | aarch64_register(). -type condition() :: - {aarch64_register(), '<', 0} - | {maybe_free_aarch64_register(), '==', 0} - | {maybe_free_aarch64_register(), '!=', integer()} + {aarch64_register(), '<', integer()} + | {maybe_free_aarch64_register(), '<', aarch64_register()} + | {maybe_free_aarch64_register(), '==', integer()} + | {maybe_free_aarch64_register(), '!=', aarch64_register() | integer()} + | {'(int)', maybe_free_aarch64_register(), '==', integer()} + | {'(int)', maybe_free_aarch64_register(), '!=', aarch64_register() | integer()} | {'(bool)', maybe_free_aarch64_register(), '==', false} | {'(bool)', maybe_free_aarch64_register(), '!=', false} - | {maybe_free_aarch64_register(), '&', non_neg_integer(), '!=', 0}. + | {maybe_free_aarch64_register(), '&', non_neg_integer(), '!=', integer()}. % ctx->e is 0x28 % ctx->x is 0x30 @@ -518,6 +521,21 @@ jump_to_label( Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1, branches = [Reloc | AccBranches]}. +%% @private +-spec rewrite_branch_instruction( + jit_aarch64_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()}, integer() +) -> binary(). +rewrite_branch_instruction({cbnz, Reg}, Offset) -> + jit_aarch64_asm:cbnz(Reg, Offset); +rewrite_branch_instruction({cbnz_w, Reg}, Offset) -> + jit_aarch64_asm:cbnz_w(Reg, Offset); +rewrite_branch_instruction({tbz, Reg, Bit}, Offset) -> + jit_aarch64_asm:tbz(Reg, Bit, Offset); +rewrite_branch_instruction({tbnz, Reg, Bit}, Offset) -> + jit_aarch64_asm:tbnz(Reg, Bit, Offset); +rewrite_branch_instruction(CC, Offset) when is_atom(CC) -> + jit_aarch64_asm:bcc(CC, Offset). + %%----------------------------------------------------------------------------- %% @doc Emit an if block, i.e. emit a test of a condition and conditionnally %% execute a block. @@ -567,7 +585,7 @@ if_block( OffsetAfter = StreamModule:offset(Stream2), %% Patch the conditional branch instruction to jump to the end of the block BranchOffset = OffsetAfter - (Offset + BranchInstrOffset), - NewBranchInstr = jit_aarch64_asm:bcc(CC, BranchOffset), + NewBranchInstr = rewrite_branch_instruction(CC, BranchOffset), Stream3 = StreamModule:replace(Stream2, Offset + BranchInstrOffset, NewBranchInstr), merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs). @@ -601,7 +619,7 @@ if_else_block( OffsetAfter = StreamModule:offset(Stream3), %% Patch the conditional branch to jump to the else block ElseBranchOffset = OffsetAfter - (Offset + BranchInstrOffset), - NewBranchInstr = jit_aarch64_asm:bcc(CC, ElseBranchOffset), + NewBranchInstr = rewrite_branch_instruction(CC, ElseBranchOffset), Stream4 = StreamModule:replace(Stream3, Offset + BranchInstrOffset, NewBranchInstr), %% Build the else block StateElse = State2#state{ @@ -619,23 +637,41 @@ if_else_block( Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs). --spec if_block_cond(state(), condition()) -> {state(), jit_aarch64_asm:cc(), non_neg_integer()}. +-spec if_block_cond(state(), condition()) -> + { + state(), + jit_aarch64_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()}, + non_neg_integer() + }. if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> - I1 = jit_aarch64_asm:tst(Reg, Reg), - % pl = positive or zero (>=0) - I2 = jit_aarch64_asm:bcc(pl, 0), + I = jit_aarch64_asm:tbz(Reg, 63, 0), + Stream1 = StreamModule:append(Stream0, I), + State1 = State0#state{stream = Stream1}, + {State1, {tbz, Reg, 63}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {Reg, '<', Val} +) when is_atom(Reg), is_integer(Val) -> + I1 = jit_aarch64_asm:cmp(Reg, Val), + % ge = greater than or equal + I2 = jit_aarch64_asm:bcc(ge, 0), Code = << I1/binary, I2/binary >>, Stream1 = StreamModule:append(Stream0, Code), State1 = State0#state{stream = Stream1}, - {State1, pl, byte_size(I1)}; + {State1, ge, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, - {RegA, '<', RegB} -) when ?IS_GPR(RegA) -> - I1 = jit_aarch64_asm:cmp(RegA, RegB), + {RegOrTuple, '<', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_aarch64_asm:cmp(Reg, RegB), % ge = greater than or equal I2 = jit_aarch64_asm:bcc(ge, 0), Code = << @@ -653,17 +689,11 @@ if_block_cond( {free, Reg0} -> Reg0; RegOrTuple -> RegOrTuple end, - I1 = jit_aarch64_asm:tst(Reg, Reg), - % ne = not equal - I2 = jit_aarch64_asm:bcc(ne, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), + I = jit_aarch64_asm:cbnz(Reg, 0), + Stream1 = StreamModule:append(Stream0, I), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, ne, byte_size(I1)}; + {State2, {cbnz, Reg}, 0}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {'(int)', RegOrTuple, '==', 0} ) -> @@ -672,7 +702,21 @@ if_block_cond( {free, Reg0} -> Reg0; RegOrTuple -> RegOrTuple end, - I1 = jit_aarch64_asm:tst_w(Reg, Reg), + I = jit_aarch64_asm:cbnz_w(Reg, 0), + Stream1 = StreamModule:append(Stream0, I), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {cbnz_w, Reg}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {'(int)', RegOrTuple, '==', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_aarch64_asm:cmp_w(Reg, Val), I2 = jit_aarch64_asm:bcc(ne, 0), Code = << I1/binary, @@ -691,11 +735,7 @@ if_block_cond( {free, Reg0} -> Reg0; RegOrTuple -> RegOrTuple end, - I1 = - case Val of - V when is_integer(V) -> jit_aarch64_asm:cmp(Reg, V); - V when is_atom(V) -> jit_aarch64_asm:cmp(Reg, V) - end, + I1 = jit_aarch64_asm:cmp(Reg, Val), I2 = jit_aarch64_asm:bcc(eq, 0), Code = << I1/binary, @@ -708,17 +748,13 @@ if_block_cond( if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {'(int)', RegOrTuple, '!=', Val} -) when is_integer(Val) orelse ?IS_GPR(Val) -> +) when is_integer(Val) -> Reg = case RegOrTuple of {free, Reg0} -> Reg0; RegOrTuple -> RegOrTuple end, - I1 = - case Val of - V when is_integer(V) -> jit_aarch64_asm:cmp32(Reg, V); - V when is_atom(V) -> jit_aarch64_asm:cmp32(Reg, V) - end, + I1 = jit_aarch64_asm:cmp_w(Reg, Val), I2 = jit_aarch64_asm:bcc(eq, 0), Code = << I1/binary, @@ -731,17 +767,13 @@ if_block_cond( if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', Val} -) when is_integer(Val) orelse ?IS_GPR(Val) -> +) when is_integer(Val) -> Reg = case RegOrTuple of {free, Reg0} -> Reg0; RegOrTuple -> RegOrTuple end, - I1 = - case Val of - V when is_integer(V) -> jit_aarch64_asm:cmp(Reg, V); - V when is_atom(V) -> jit_aarch64_asm:cmp(Reg, V) - end, + I1 = jit_aarch64_asm:cmp(Reg, Val), I2 = jit_aarch64_asm:bcc(ne, 0), Code = << I1/binary, @@ -753,67 +785,66 @@ if_block_cond( {State2, ne, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, - {'(int)', RegOrTuple, '==', Val} -) when is_integer(Val) orelse ?IS_GPR(Val) -> + {'(bool)', RegOrTuple, '==', false} +) -> Reg = case RegOrTuple of {free, Reg0} -> Reg0; RegOrTuple -> RegOrTuple end, - I1 = - case Val of - V when is_integer(V) -> jit_aarch64_asm:cmp32(Reg, V); - V when is_atom(V) -> jit_aarch64_asm:cmp32(Reg, V) - end, - I2 = jit_aarch64_asm:bcc(ne, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), + % Test lowest bit + I = jit_aarch64_asm:tbnz(Reg, 0, 0), + Stream1 = StreamModule:append(Stream0, I), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, ne, byte_size(I1)}; + {State2, {tbnz, Reg, 0}, 0}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, - {'(bool)', RegOrTuple, '==', false} + {'(bool)', RegOrTuple, '!=', false} ) -> Reg = case RegOrTuple of {free, Reg0} -> Reg0; RegOrTuple -> RegOrTuple end, - % Test low 8 bits - I1 = jit_aarch64_asm:tst_w(Reg, 16#FF), - I2 = jit_aarch64_asm:bcc(ne, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), + % Test lowest bit + I = jit_aarch64_asm:tbz(Reg, 0, 0), + Stream1 = StreamModule:append(Stream0, I), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, ne, byte_size(I1)}; + {State2, {tbz, Reg, 0}, 0}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0} = State0, - {'(bool)', RegOrTuple, '!=', false} + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {RegOrTuple, '&', Val, '!=', 0} ) -> Reg = case RegOrTuple of {free, Reg0} -> Reg0; RegOrTuple -> RegOrTuple end, - % Test low 8 bits - I1 = jit_aarch64_asm:tst_w(Reg, 16#FF), + % Test bits + TestCode = + try + jit_aarch64_asm:tst(Reg, Val) + catch + error:{unencodable_immediate, Val} -> + TestCode0 = jit_aarch64_asm:mov(Temp, Val), + TestCode1 = jit_aarch64_asm:tst(Reg, Temp), + <> + end, I2 = jit_aarch64_asm:bcc(eq, 0), Code = << - I1/binary, + TestCode/binary, I2/binary >>, Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, eq, byte_size(I1)}; + {State2, eq, byte_size(TestCode)}; if_block_cond( #state{ stream_module = StreamModule, @@ -853,53 +884,7 @@ if_block_cond( Stream3 = StreamModule:append(Stream2, I3), State3 = State1#state{stream = Stream3}, State4 = if_block_free_reg(RegTuple, State3), - {State4, eq, OffsetAfter - OffsetBefore}; -if_block_cond( - #state{ - stream_module = StreamModule, - stream = Stream0 - } = State0, - {RegOrTuple, '&', Val} -) -> - Reg = - case RegOrTuple of - {free, Reg0} -> Reg0; - RegOrTuple -> RegOrTuple - end, - % Test bits - I1 = jit_aarch64_asm:tst(Reg, Val), - I2 = jit_aarch64_asm:bcc(eq, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), - State1 = if_block_free_reg(RegOrTuple, State0), - State2 = State1#state{stream = Stream1}, - {State2, eq, byte_size(I1)}; -if_block_cond( - #state{ - stream_module = StreamModule, - stream = Stream0 - } = State0, - {'(bool)', RegOrTuple, '&', Val} -) -> - Reg = - case RegOrTuple of - {free, Reg0} -> Reg0; - RegOrTuple -> RegOrTuple - end, - % Test 8-bit value - I1 = jit_aarch64_asm:tst_w(Reg, Val), - I2 = jit_aarch64_asm:bcc(eq, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), - State1 = if_block_free_reg(RegOrTuple, State0), - State2 = State1#state{stream = Stream1}, - {State2, eq, byte_size(I1)}. + {State4, eq, OffsetAfter - OffsetBefore}. -spec if_block_free_reg(aarch64_register() | {free, aarch64_register()}, state()) -> state(). if_block_free_reg({free, Reg}, State0) -> diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index ac78e1db9..85ef11198 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -30,8 +30,12 @@ blr/1, br/1, brk/1, + cbnz/2, + cbnz_w/2, + tbz/3, + tbnz/3, cmp/2, - cmp32/2, + cmp_w/2, and_/3, ldr/2, ldr_w/2, @@ -779,6 +783,39 @@ bcc(Cond, Offset) when is_atom(Cond), is_integer(Offset) -> Offset19 = Offset div 4, <<(16#54000000 bor ((Offset19 band 16#7FFFF) bsl 5) bor CondNum):32/little>>. +%% Emit a compare and branch on non-zero +-spec cbnz(aarch64_gpr_register(), integer()) -> binary(). +cbnz(Rt, Offset) when is_integer(Offset) -> + RtNum = reg_to_num(Rt), + Offset19 = Offset div 4, + <<(16#B5000000 bor ((Offset19 band 16#7FFFF) bsl 5) bor RtNum):32/little>>. + +-spec cbnz_w(aarch64_gpr_register(), integer()) -> binary(). +cbnz_w(Rt, Offset) when is_integer(Offset) -> + RtNum = reg_to_num(Rt), + Offset19 = Offset div 4, + <<(16#35000000 bor ((Offset19 band 16#7FFFF) bsl 5) bor RtNum):32/little>>. + +%% Emit a test bit and branch if zero +-spec tbz(aarch64_gpr_register(), 0..63, integer()) -> binary(). +tbz(Rt, Bit, Offset) when Offset >= -32768 andalso Offset < 32768 -> + RtNum = reg_to_num(Rt), + Offset14 = Offset div 4, + << + ((Bit band 32 bsl 26) bor 16#36000000 bor (Bit band 31 bsl 19) bor + ((Offset14 band 16#3FFF) bsl 5) bor RtNum):32/little + >>. + +%% Emit a test bit and branch if not zero +-spec tbnz(aarch64_gpr_register(), 0..63, integer()) -> binary(). +tbnz(Rt, Bit, Offset) when Offset >= -32768 andalso Offset < 32768 -> + RtNum = reg_to_num(Rt), + Offset14 = Offset div 4, + << + ((Bit band 32 bsl 26) bor 16#37000000 bor (Bit band 31 bsl 19) bor + ((Offset14 band 16#3FFF) bsl 5) bor RtNum):32/little + >>. + %% Emit a compare instruction -spec cmp(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). cmp(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> @@ -801,19 +838,13 @@ cmp(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> <>. %% Emit a 32-bit compare instruction --spec cmp32(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). -cmp32(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> - RnNum = reg_to_num(Rn), - RmNum = reg_to_num(Rm), - %% AArch64 CMP (32-bit shifted register) encoding: CMP Wn, Wm - %% This is SUBS WZR, Wn, Wm: 01101011000mmmmm000000nnnnn11111 - <<(16#6B00001F bor (RmNum bsl 16) bor (RnNum bsl 5)):32/little>>; -cmp32(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 -> +-spec cmp_w(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). +cmp_w(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 -> RnNum = reg_to_num(Rn), %% AArch64 CMP (32-bit immediate) encoding: CMP Wn, #imm %% This is SUBS WZR, Wn, #imm: 0111000100iiiiiiiiiiiinnnnn11111 <<(16#7100001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little>>; -cmp32(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm < 0, Imm >= -4095 -> +cmp_w(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm < 0, Imm >= -4095 -> RnNum = reg_to_num(Rn), %% For negative immediates, use ADD form: CMP Wn, #(-imm) becomes ADDS WZR, Wn, #(-imm) %% AArch64 ADDS (32-bit immediate) encoding: 0011000100iiiiiiiiiiiinnnnn11111 diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index 1702c0021..08fd4abca 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -250,13 +250,11 @@ cmp_test_() -> ?_assertEqual(<<16#F103001F:32/little>>, jit_aarch64_asm:cmp(r0, 192)) ]. -cmp32_test_() -> +cmp_w_test_() -> [ - % cmp32 reg, reg - ?_assertEqual(<<16#6B01001F:32/little>>, jit_aarch64_asm:cmp32(r0, r1)), - % cmp32 reg, imm - ?_assertEqual(<<16#7100001F:32/little>>, jit_aarch64_asm:cmp32(r0, 0)), - ?_assertEqual(<<16#7103001F:32/little>>, jit_aarch64_asm:cmp32(r0, 192)) + % cmp_w reg, imm + ?_assertEqual(<<16#7100001F:32/little>>, jit_aarch64_asm:cmp_w(r0, 0)), + ?_assertEqual(<<16#7103001F:32/little>>, jit_aarch64_asm:cmp_w(r0, 192)) ]. and_test_() -> @@ -389,6 +387,54 @@ bcc_test_() -> ) ]. +cbnz_test_() -> + [ + ?_assertEqual( + asm(<<16#b5000401:32/little>>, "cbnz x1, 128"), + jit_aarch64_asm:cbnz(r1, 128) + ), + ?_assertEqual( + asm(<<16#35000402:32/little>>, "cbnz w2, 128"), + jit_aarch64_asm:cbnz_w(r2, 128) + ), + ?_assertEqual( + asm(<<16#b5fffc03:32/little>>, "cbnz x3, -128"), + jit_aarch64_asm:cbnz(r3, -128) + ) + ]. + +tbz_test_() -> + [ + ?_assertEqual( + asm(<<16#b6f80400:32/little>>, "tbz x0, #63, 128"), + jit_aarch64_asm:tbz(r0, 63, 128) + ), + ?_assertEqual( + asm(<<16#36180400:32/little>>, "tbz x0, #3, 128"), + jit_aarch64_asm:tbz(r0, 3, 128) + ), + ?_assertEqual( + asm(<<16#363ffc03:32/little>>, "tbz x3, #7, -128"), + jit_aarch64_asm:tbz(r3, 7, -128) + ) + ]. + +tbnz_test_() -> + [ + ?_assertEqual( + asm(<<16#37000400:32/little>>, "tbnz x0, #0, 128"), + jit_aarch64_asm:tbnz(r0, 0, 128) + ), + ?_assertEqual( + asm(<<16#37180400:32/little>>, "tbnz x0, #3, 128"), + jit_aarch64_asm:tbnz(r0, 3, 128) + ), + ?_assertEqual( + asm(<<16#373ffc03:32/little>>, "tbnz x3, #7, -128"), + jit_aarch64_asm:tbnz(r3, 7, -128) + ) + ]. + stp_test_() -> [ ?_assertEqual( diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index deb00c7b2..5568c7fed 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -202,6 +202,69 @@ call_primitive_last_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +return_if_not_equal_to_ctx_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(r7, ResultReg), + State2 = ?BACKEND:return_if_not_equal_to_ctx(State1, {free, ResultReg}), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: f9405450 ldr x16, [x2, #168]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d63f0200 blr x16\n" + " 10: aa0003e7 mov x7, x0\n" + " 14: a8c10be1 ldp x1, x2, [sp], #16\n" + " 18: a8c103fe ldp x30, x0, [sp], #16\n" + " 1c: eb0000ff cmp x7, x0\n" + " 20: 54000060 b.eq 0x2c // b.none\n" + " 24: aa0703e0 mov x0, x7\n" + " 28: d65f03c0 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(r7, ResultReg), + {State2, OtherReg} = ?BACKEND:copy_to_native_register(State1, ResultReg), + ?assertEqual(r8, OtherReg), + State3 = ?BACKEND:return_if_not_equal_to_ctx(State2, {free, OtherReg}), + Stream = ?BACKEND:stream(State3), + Dump = + << + " 0: f9405450 ldr x16, [x2, #168]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d63f0200 blr x16\n" + " 10: aa0003e7 mov x7, x0\n" + " 14: a8c10be1 ldp x1, x2, [sp], #16\n" + " 18: a8c103fe ldp x30, x0, [sp], #16\n" + " 1c: aa0703e8 mov x8, x7\n" + " 20: eb00011f cmp x8, x0\n" + " 24: 54000060 b.eq 0x30 // b.none\n" + " 28: aa0803e0 mov x0, x8\n" + " 2c: d65f03c0 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + move_to_cp_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}), @@ -226,6 +289,501 @@ increment_sp_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +if_block_test_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State2, RegA, RegB} + end, + fun({State0, RegA, RegB}) -> + [ + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: b6f80047 tbz x7, #63, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: eb0800ff cmp x7, x8\n" + " c: 5400004a b.ge 0x14 // b.tcont\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: b5000047 cbnz x7, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: b5000047 cbnz x7, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 35000047 cbnz w7, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 35000047 cbnz w7, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f100ecff cmp x7, #0x3b\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f100ecff cmp x7, #0x3b\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 7100a8ff cmp w7, #0x2a\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 7100a8ff cmp w7, #0x2a\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f100ecff cmp x7, #0x3b\n" + " c: 54000041 b.ne 0x14 // b.any\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f100ecff cmp x7, #0x3b\n" + " c: 54000041 b.ne 0x14 // b.any\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 7100a8ff cmp w7, #0x2a\n" + " c: 54000041 b.ne 0x14 // b.any\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 7100a8ff cmp w7, #0x2a\n" + " c: 54000041 b.ne 0x14 // b.any\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 37000047 tbnz w7, #0, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 37000047 tbnz w7, #0, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 36000047 tbz w7, #0, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 36000047 tbz w7, #0, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f24008ff tst x7, #0x7\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#5, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: d28000a9 mov x9, #0x5 // #5\n" + " c: ea0900ff tst x7, x9\n" + " 10: 54000040 b.eq 0x18 // b.none\n" + " 14: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f24008ff tst x7, #0x7\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 92400ce9 and x9, x7, #0xf\n" + " c: f1003d3f cmp x9, #0xf\n" + " 10: 54000040 b.eq 0x18 // b.none\n" + " 14: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 92400ce7 and x7, x7, #0xf\n" + " c: f1003cff cmp x7, #0xf\n" + " 10: 54000040 b.eq 0x18 // b.none\n" + " 14: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end) + ] + end}. + +if_else_block_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_else_block( + State2, + {Reg1, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 2) + end, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 4) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f100ecff cmp x7, #0x3b\n" + " c: 54000061 b.ne 0x18 // b.any\n" + " 10: 91000908 add x8, x8, #0x2\n" + " 14: 14000002 b 0x1c\n" + " 18: 91001108 add x8, x8, #0x4" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +shift_right_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:shift_right(State1, Reg, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: d343fce7 lsr x7, x7, #3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +shift_left_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:shift_left(State1, Reg, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: d37df0e7 lsl x7, x7, #3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_only_or_schedule_next_and_label_relocation_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:jump_table(State0, 2), @@ -306,12 +864,11 @@ call_bif_with_large_literal_integer_test() -> " 74: aa0003e7 mov x7, x0\n" " 78: a8c10be1 ldp x1, x2, [sp], #16\n" " 7c: a8c103fe ldp x30, x0, [sp], #16\n" - " 80: ea0700ff tst x7, x7\n" - " 84: 54000081 b.ne 0x94 // b.any\n" - " 88: f9401847 ldr x7, [x2, #48]\n" - " 8c: d2801182 mov x2, #0x8c // #140\n" - " 90: d61f00e0 br x7\n" - " 94: f9001807 str x7, [x0, #48]" + " 80: b5000087 cbnz x7, 0x90\n" + " 84: f9401847 ldr x7, [x2, #48]\n" + " 88: d2801102 mov x2, #0x88 // #136\n" + " 8c: d61f00e0 br x7\n" + " 90: f9001807 str x7, [x0, #48]" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -383,6 +940,61 @@ is_integer_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +cond_jump_to_label(Cond, Label, MMod, MSt0) -> + MMod:if_block(MSt0, Cond, fun(BSt0) -> + MMod:jump_to_label(BSt0, Label) + end). + +is_number_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> + BSt1 = cond_jump_to_label( + {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 + ), + BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), + cond_jump_to_label( + {'and', [ + {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FLOAT} + ]}, + Label, + ?BACKEND, + BSt3 + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + Offset = ?BACKEND:offset(State3), + Labels = [{Label, Offset + 16#100}], + State4 = ?BACKEND:update_branches(State3, Labels), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: 92400ce8 and x8, x7, #0xf\n" + " 8: f1003d1f cmp x8, #0xf\n" + " c: 540001c0 b.eq 0x44 // b.none\n" + " 10: 924004e8 and x8, x7, #0x3\n" + " 14: f100091f cmp x8, #0x2\n" + " 18: 54000040 b.eq 0x20 // b.none\n" + " 1c: 1400004a b 0x144\n" + " 20: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 24: f94000e7 ldr x7, [x7]\n" + " 28: 924014e8 and x8, x7, #0x3f\n" + " 2c: f100211f cmp x8, #0x8\n" + " 30: 540000a0 b.eq 0x44 // b.none\n" + " 34: 924014e7 and x7, x7, #0x3f\n" + " 38: f10060ff cmp x7, #0x18\n" + " 3c: 54000040 b.eq 0x44 // b.none\n" + " 40: 14000041 b 0x144" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + is_boolean_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), Label = 1, @@ -520,6 +1132,33 @@ move_to_vm_register_test_() -> end, fun(State0) -> [ + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, 0}, << + " 0: f900181f str xzr, [x0, #48]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, extra}, << + " 0: f900581f str xzr, [x0, #176]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {ptr, r10}, << + " 0: f900015f str xzr, [x10]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 2}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f90008ff str xzr, [x7, #16]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 20}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f90050ff str xzr, [x7, #160]" + >>) + end), %% Test: Immediate to x_reg ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, 0}, << @@ -527,6 +1166,26 @@ move_to_vm_register_test_() -> " 4: f9001807 str x7, [x0, #48]" >>) end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, extra}, << + " 0: d2800547 mov x7, #0x2a // #42\n" + " 4: f9005807 str x7, [x0, #176]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 2}, << + " 0: d2800547 mov x7, #0x2a // #42\n" + " 4: f9401408 ldr x8, [x0, #40]\n" + " 8: f9000907 str x7, [x8, #16]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 20}, << + " 0: d2800547 mov x7, #0x2a // #42\n" + " 4: f9401408 ldr x8, [x0, #40]\n" + " 8: f9005107 str x7, [x8, #160]" + >>) + end), %% Test: Immediate to ptr ?_test(begin move_to_vm_register_test0(State0, 99, {ptr, r10}, << @@ -585,6 +1244,11 @@ move_to_vm_register_test_() -> " 0: f900180a str x10, [x0, #48]" >>) end), + ?_test(begin + move_to_vm_register_test0(State0, r10, {x_reg, extra}, << + " 0: f900580a str x10, [x0, #176]" + >>) + end), %% Test: Native register to ptr ?_test(begin move_to_vm_register_test0(State0, r9, {ptr, r10}, << @@ -608,6 +1272,35 @@ move_to_vm_register_test_() -> " 10: f9001807 str x7, [x0, #48]" >>) end), + ?_test(begin + move_to_vm_register_test0(State0, 16#123456789abcdef0, {x_reg, extra}, << + " 0: d29bde07 mov x7, #0xdef0 // #57072\n" + " 4: f2b35787 movk x7, #0x9abc, lsl #16\n" + " 8: f2cacf07 movk x7, #0x5678, lsl #32\n" + " c: f2e24687 movk x7, #0x1234, lsl #48\n" + " 10: f9005807 str x7, [x0, #176]\n" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#123456789abcdef0, {y_reg, 2}, << + " 0: d29bde07 mov x7, #0xdef0 // #57072\n" + " 4: f2b35787 movk x7, #0x9abc, lsl #16\n" + " 8: f2cacf07 movk x7, #0x5678, lsl #32\n" + " c: f2e24687 movk x7, #0x1234, lsl #48\n" + " 10: f9401408 ldr x8, [x0, #40]\n" + " 14: f9000907 str x7, [x8, #16]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#123456789abcdef0, {y_reg, 20}, << + " 0: d29bde07 mov x7, #0xdef0 // #57072\n" + " 4: f2b35787 movk x7, #0x9abc, lsl #16\n" + " 8: f2cacf07 movk x7, #0x5678, lsl #32\n" + " c: f2e24687 movk x7, #0x1234, lsl #48\n" + " 10: f9401408 ldr x8, [x0, #40]\n" + " 14: f9005107 str x7, [x8, #160]" + >>) + end), %% Test: Large immediate to ptr ?_test(begin move_to_vm_register_test0(State0, 16#123456789abcdef0, {ptr, r10}, << From dc371bcfd07742feb844ac8e6713aed1f506221d Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 20:15:52 +0200 Subject: [PATCH 31/46] AArch64: support aarch64 on Linux Signed-off-by: Paul Guyot --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0073085fa..9025b1773 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,7 +55,7 @@ if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") if (NOT AVM_DISABLE_JIT) set(AVM_JIT_TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR}) endif() -elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64|aarch64$") if (NOT AVM_DISABLE_JIT) set(AVM_JIT_TARGET_ARCH "aarch64") endif() From 229d30c9a64c55cd626d7346b57f1719fa101cef Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 15:18:47 +0200 Subject: [PATCH 32/46] AArch64: static assert offsets Signed-off-by: Paul Guyot --- src/libAtomVM/jit.c | 13 +++++++++++++ src/libAtomVM/jit.h | 9 ++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 2485846bd..eaaf29c76 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -72,6 +72,19 @@ _Static_assert(offsetof(Context, bs_offset) == 0xD0, "ctx->bs_offset is 0xD0 in _Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_x86_64.erl"); _Static_assert(offsetof(JITState, continuation) == 0x8, "jit_state->continuation is 0x8 in jit/src/jit_x86_64.erl"); _Static_assert(offsetof(JITState, remaining_reductions) == 0x10, "jit_state->remaining_reductions is 0x10 in jit/src/jit_x86_64.erl"); +#elif JIT_ARCH_TARGET == JIT_ARCH_AARCH64 +_Static_assert(offsetof(Context, e) == 0x28, "ctx->e is 0x28 in jit/src/jit_aarch64.erl"); +_Static_assert(offsetof(Context, x) == 0x30, "ctx->x is 0x30 in jit/src/jit_aarch64.erl"); +_Static_assert(offsetof(Context, cp) == 0xB8, "ctx->cp is 0xB8 in jit/src/jit_aarch64.erl"); +_Static_assert(offsetof(Context, fr) == 0xC0, "ctx->fr is 0xC0 in jit/src/jit_aarch64.erl"); +_Static_assert(offsetof(Context, bs) == 0xC8, "ctx->bs is 0xC8 in jit/src/jit_aarch64.erl"); +_Static_assert(offsetof(Context, bs_offset) == 0xD0, "ctx->bs_offset is 0xD0 in jit/src/jit_aarch64.erl"); + +_Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_aarch64.erl"); +_Static_assert(offsetof(JITState, continuation) == 0x8, "jit_state->continuation is 0x8 in jit/src/jit_aarch64.erl"); +_Static_assert(offsetof(JITState, remaining_reductions) == 0x10, "jit_state->remaining_reductions is 0x10 in jit/src/jit_aarch64.erl"); +#else +#error Unknown jit target #endif #define PROCESS_MAYBE_TRAP_RETURN_VALUE(return_value, offset) \ diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index ba0618c5f..646c4e2b4 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -158,16 +158,23 @@ enum TrapAndLoadResult #define JIT_VARIANT_PIC 1 +#ifndef AVM_NO_JIT + #ifdef __x86_64__ #define JIT_ARCH_TARGET JIT_ARCH_X86_64 #define JIT_JUMPTABLE_ENTRY_SIZE 5 #endif -#ifdef __arm64__ +#if defined(__arm64__) || defined(__aarch64__) #define JIT_ARCH_TARGET JIT_ARCH_AARCH64 #define JIT_JUMPTABLE_ENTRY_SIZE 4 #endif +#ifndef JIT_ARCH_TARGET +#error Unknown JIT target +#endif +#endif + /** * @brief Return the entry point from a given jit stream * From b202a3ba6ee029524ddd33cda576ee7f6e96d881 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 22:37:53 +0200 Subject: [PATCH 33/46] AArch64: use SCRATCH_REG macro and fix the list Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 54ec8ebf9..c2a7a2260 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -198,6 +198,7 @@ -define(AVAILABLE_FPREGS, [v0, v1, v2, v3, v4, v5, v6, v7]). -define(PARAMETER_REGS, [r0, r1, r2, r3, r4, r5]). -define(PARAMETER_FPREGS, [v0, v1, v2, v3, v4, v5]). +-define(SCRATCH_REGS, [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6, r17]). %%----------------------------------------------------------------------------- %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. @@ -1061,7 +1062,7 @@ set_args( ParamRegs = parameter_regs(Args), ArgsRegs = args_regs(Args), AvailableScratchGP = - [rdi, rsi, rdx, rcx, r8, r9, r10, r11] -- ParamRegs -- ArgsRegs -- UsedRegs, + ?SCRATCH_REGS -- ParamRegs -- ArgsRegs -- UsedRegs, AvailableScratchFP = ?AVAILABLE_FPREGS -- ParamRegs -- ArgsRegs -- UsedRegs, Offset = StreamModule:offset(Stream0), Args1 = [ From 5f4829c8dc9323557e3d9f0c02ad1676d4234a27 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 7 Sep 2025 21:24:06 +0200 Subject: [PATCH 34/46] AArch64: fix register usage with if_block_cond Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index c2a7a2260..320d4fe2c 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -680,8 +680,9 @@ if_block_cond( I2/binary >>, Stream1 = StreamModule:append(Stream0, Code), - State1 = State0#state{stream = Stream1}, - {State1, ge, byte_size(I1)}; + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, ge, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0} ) -> From be7343fa9d2af92e94f1549ad2b4d8a17f843773 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 7 Sep 2025 21:24:46 +0200 Subject: [PATCH 35/46] AArch64: add get_array_element with {free, Reg} Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 320d4fe2c..f618c6118 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -1408,8 +1408,21 @@ move_array_element( }. %% @doc move reg[x] to a vm or native register --spec get_array_element(state(), aarch64_register(), non_neg_integer()) -> +-spec get_array_element( + state(), aarch64_register() | {free, aarch64_register()}, non_neg_integer() +) -> {state(), aarch64_register()}. +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {free, Reg}, + Index +) -> + I1 = jit_aarch64_asm:ldr(Reg, {Reg, Index * 8}), + Stream1 = StreamModule:append(Stream0, <>), + {State#state{stream = Stream1}, Reg}; get_array_element( #state{ stream_module = StreamModule, From 8bd724915286c09bba9a1628051bfce524a4c5c7 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 7 Sep 2025 21:25:34 +0200 Subject: [PATCH 36/46] AArch64: add move_to_native_register/2,3 with {x_reg, extra} Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index f618c6118..f25e55eea 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -1531,6 +1531,18 @@ move_to_native_register( I1 = jit_aarch64_asm:mov(Reg, Imm), Stream1 = StreamModule:append(Stream0, I1), {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, extra} +) -> + I1 = jit_aarch64_asm:ldr(Reg, ?X_REG(?MAX_REG)), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; move_to_native_register( #state{ stream_module = StreamModule, @@ -1588,6 +1600,12 @@ move_to_native_register( I1 = jit_aarch64_asm:ldr(RegDst, {Reg, 0}), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst +) -> + I1 = jit_aarch64_asm:ldr(RegDst, ?X_REG(?MAX_REG)), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; move_to_native_register( #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst ) when From 1b75c87b6b10bd1de633b1a2070f83f52e0cba50 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 20 Sep 2025 10:43:05 +0200 Subject: [PATCH 37/46] AArch64: move labels to backend states Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 59 +++++++++++++++++++++------- tests/libs/jit/jit_aarch64_tests.erl | 37 ++++++++--------- 2 files changed, 64 insertions(+), 32 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index f25e55eea..891baaa50 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -31,7 +31,7 @@ free_native_registers/2, assert_all_native_free/1, jump_table/2, - update_branches/2, + update_branches/1, call_primitive/3, call_primitive_last/3, call_primitive_with_cp/3, @@ -64,7 +64,9 @@ call_or_schedule_next/2, call_only_or_schedule_next/2, call_func_ptr/3, - return_labels_and_lines/3 + return_labels_and_lines/2, + add_label/2, + add_label/3 ]). -include_lib("jit.hrl"). @@ -144,7 +146,8 @@ branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], available_regs :: [aarch64_register()], available_fpregs :: [aarch64_register()], - used_regs :: [aarch64_register()] + used_regs :: [aarch64_register()], + labels :: [{integer() | reference(), integer()}] }). -type state() :: #state{}. @@ -236,7 +239,8 @@ new(_Variant, StreamModule, Stream) -> offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, available_fpregs = ?AVAILABLE_FPREGS, - used_regs = [] + used_regs = [], + labels = [] }. %%----------------------------------------------------------------------------- @@ -364,19 +368,18 @@ jump_table0( %% @doc Rewrite stream to update all branches for labels. %% @end %% @param State current backend state -%% @param Labels list of tuples with label, offset and size of the branch in bits %% @return Updated backend state %%----------------------------------------------------------------------------- --spec update_branches(state(), [{non_neg_integer(), non_neg_integer()}]) -> state(). -update_branches(#state{branches = []} = State, _Labels) -> +-spec update_branches(state()) -> state(). +update_branches(#state{branches = []} = State) -> State; update_branches( #state{ stream_module = StreamModule, stream = Stream0, - branches = [{Label, Offset, Type} | BranchesT] - } = State, - Labels + branches = [{Label, Offset, Type} | BranchesT], + labels = Labels + } = State ) -> {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), Rel = LabelOffset - Offset, @@ -387,7 +390,7 @@ update_branches( b -> jit_aarch64_asm:b(Rel) end, Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), - update_branches(State#state{stream = Stream1, branches = BranchesT}, Labels). + update_branches(State#state{stream = Stream1, branches = BranchesT}). %%----------------------------------------------------------------------------- %% @doc Emit a call (call with return) to a primitive with arguments. This @@ -1950,7 +1953,6 @@ set_bs(#state{stream_module = StreamModule, stream = Stream0} = State0, TermReg) %%----------------------------------------------------------------------------- %% @param State current state -%% @param SortedLabels labels information, sorted by offset %% @param SortedLines line information, sorted by offset %% @doc Build labels and line tables and encode a function that returns it. %% In this case, the function returns the effective address of what immediately @@ -1961,11 +1963,16 @@ set_bs(#state{stream_module = StreamModule, stream = Stream0} = State0, TermReg) return_labels_and_lines( #state{ stream_module = StreamModule, - stream = Stream0 + stream = Stream0, + labels = Labels } = State, - SortedLabels, SortedLines ) -> + SortedLabels = lists:keysort(2, [ + {Label, LabelOffset} + || {Label, LabelOffset} <- Labels, is_integer(Label) + ]), + I1 = jit_aarch64_asm:adr(r0, 8), I2 = jit_aarch64_asm:ret(), LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, @@ -2016,3 +2023,27 @@ args_regs(Args) -> end, Args ). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at the current offset +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference()) -> state(). +add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label) -> + Offset = StreamModule:offset(Stream), + add_label(State, Label, Offset). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at a specific offset +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @param Offset the explicit offset for this label +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label(#state{labels = Labels} = State, Label, Offset) -> + State#state{labels = [{Label, Offset} | Labels]}. diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 5568c7fed..73b11ba83 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -787,15 +787,15 @@ shift_left_test() -> call_only_or_schedule_next_and_label_relocation_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:jump_table(State0, 2), - Offset1 = ?BACKEND:offset(State1), - State2 = ?BACKEND:call_only_or_schedule_next(State1, 2), - Offset2 = ?BACKEND:offset(State2), - State3 = ?BACKEND:call_primitive_last(State2, 0, [ctx, jit_state]), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), % OP_INT_CALL_END - Offset0 = ?BACKEND:offset(State3), - State4 = ?BACKEND:call_primitive_last(State3, 1, [ctx, jit_state]), - State5 = ?BACKEND:update_branches(State4, [{0, Offset0}, {1, Offset1}, {2, Offset2}]), - Stream = ?BACKEND:stream(State5), + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), Dump = << " 0: 1400000d b 0x34\n" @@ -919,9 +919,9 @@ is_integer_test() -> State3 = ?BACKEND:free_native_registers(State2, [Reg]), ?BACKEND:assert_all_native_free(State3), Offset = ?BACKEND:offset(State3), - Labels = [{Label, Offset + 16#100}], - State4 = ?BACKEND:update_branches(State3, Labels), - Stream = ?BACKEND:stream(State4), + State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), Dump = << " 0: f9401807 ldr x7, [x0, #48]\n" " 4: 92400ce8 and x8, x7, #0xf\n" @@ -971,9 +971,9 @@ is_number_test() -> State3 = ?BACKEND:free_native_registers(State2, [Reg]), ?BACKEND:assert_all_native_free(State3), Offset = ?BACKEND:offset(State3), - Labels = [{Label, Offset + 16#100}], - State4 = ?BACKEND:update_branches(State3, Labels), - Stream = ?BACKEND:stream(State4), + State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), Dump = << " 0: f9401807 ldr x7, [x0, #48]\n" " 4: 92400ce8 and x8, x7, #0xf\n" @@ -1005,11 +1005,12 @@ is_boolean_test() -> end) end), State3 = ?BACKEND:free_native_registers(State2, [Reg]), - Offset = ?BACKEND:offset(State3), - Labels = [{Label, Offset + 16#100}], ?BACKEND:assert_all_native_free(State3), - State4 = ?BACKEND:update_branches(State3, Labels), - Stream = ?BACKEND:stream(State4), + Offset = ?BACKEND:offset(State3), + State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Offset = ?BACKEND:offset(State3), Dump = << " 0: f9401807 ldr x7, [x0, #48]\n" " 4: f1012cff cmp x7, #0x4b\n" From 6d3d0aa62444c61d2e30886e40a76a7ea5ee581e Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 20 Sep 2025 10:48:49 +0200 Subject: [PATCH 38/46] AArch64: optimize jump_to_label with known labels Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 891baaa50..24fffcffc 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -516,14 +516,25 @@ return_if_not_equal_to_ctx( %% @return Updated backend state %%----------------------------------------------------------------------------- jump_to_label( - #state{stream_module = StreamModule, stream = Stream0, branches = AccBranches} = State, Label + #state{stream_module = StreamModule, stream = Stream0, branches = AccBranches, labels = Labels} = + State, + Label ) -> Offset = StreamModule:offset(Stream0), - % Placeholder offset, will be patched - I1 = jit_aarch64_asm:b(0), - Reloc = {Label, Offset, b}, - Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1, branches = [Reloc | AccBranches]}. + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct branch without relocation + Rel = LabelOffset - Offset, + I1 = jit_aarch64_asm:b(Rel), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; + false -> + % Label not yet known, emit placeholder and add relocation + I1 = jit_aarch64_asm:b(0), + Reloc = {Label, Offset, b}, + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1, branches = [Reloc | AccBranches]} + end. %% @private -spec rewrite_branch_instruction( From cbae8212989b9fb029571b288ded2d8f61e9f0df Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 20 Sep 2025 11:00:27 +0200 Subject: [PATCH 39/46] AArch64: add move_to_vm_register test with fp_reg Signed-off-by: Paul Guyot --- tests/libs/jit/jit_aarch64_tests.erl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 73b11ba83..4ab58e303 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -1334,6 +1334,13 @@ move_to_vm_register_test_() -> " 0: 92800007 mov x7, #0xffffffffffffffff // #-1\n" " 4: f9001807 str x7, [x0, #48]" >>) + end), + %% Test: ptr with offset to fp_reg (term_to_float) + ?_test(begin + move_to_vm_register_test0(State0, {free, {ptr, r9, 1}}, {fp_reg, 3}, << + " 0: f9400127 ldr x7, [x9]\n" + " 4: f9002407 str x7, [x0, #72]" + >>) end) ] end}. From a5327c079845be21353c3dfd22fa3b6358d01833 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 20 Sep 2025 10:52:29 +0200 Subject: [PATCH 40/46] AArch64: add continuation entry point to save registers Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 24fffcffc..85210145f 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -54,6 +54,7 @@ increment_sp/2, set_continuation_to_label/2, set_continuation_to_offset/1, + continuation_entry_point/1, get_module_index/1, and_/3, or_/3, @@ -1735,6 +1736,12 @@ set_continuation_to_offset( Stream1 = StreamModule:append(Stream0, Code), {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}. +%% @doc Implement a continuation entry point. On AArch64 this is a nop +%% as we don't need to save any register. +-spec continuation_entry_point(#state{}) -> #state{}. +continuation_entry_point(State) -> + State. + get_module_index( #state{ stream_module = StreamModule, From d0b3d5bca42c372b5173ad016d493db1b0634464 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 30 Aug 2025 06:53:13 +0200 Subject: [PATCH 41/46] AArch64: remove unused move_to_native_register/3 with fpu move_to_native_register/3 is never called with fp_reg Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 14 -------------- tests/libs/jit/jit_aarch64_tests.erl | 10 ---------- 2 files changed, 24 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 85210145f..31f8074e9 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -1636,20 +1636,6 @@ move_to_native_register( I2 = jit_aarch64_asm:ldr(RegDst, {RegDst, Y * 8}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1}; -move_to_native_register( - #state{ - stream_module = StreamModule, - stream = Stream0, - available_regs = [Temp | _] - } = State, - {fp_reg, F}, - RegDst -) -> - I1 = jit_aarch64_asm:ldr(Temp, ?FP_REGS), - I2 = jit_aarch64_asm:ldr_d(RegDst, {Temp, F * 8}), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}. -spec copy_to_native_register(state(), value()) -> {state(), aarch64_register()}. diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 4ab58e303..6005a4952 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -1644,16 +1644,6 @@ move_to_native_register_test_() -> " 4: f9400908 ldr x8, [x8, #16]" >>, ?assertEqual(dump_to_bin(Dump), Stream) - end), - %% move_to_native_register/2: {fp_reg, N} - ?_test(begin - State1 = ?BACKEND:move_to_native_register(State0, {fp_reg, 3}, v0), - Stream = ?BACKEND:stream(State1), - Dump = << - " 0: f9406007 ldr x7, [x0, #192]\n" - " 4: fd400ce0 ldr d0, [x7, #24]" - >>, - ?assertEqual(dump_to_bin(Dump), Stream) end) ] end}. From ba408fde85f7204f2ac3e738a289e3a28e7a7e19 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 30 Aug 2025 08:10:49 +0200 Subject: [PATCH 42/46] AArch64: remove entirely usage of FPU Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 189 ++++++++--------------- libs/jit/src/jit_aarch64_asm.erl | 29 ---- tests/libs/jit/jit_aarch64_asm_tests.erl | 10 -- tests/libs/jit/jit_aarch64_tests.erl | 31 ++-- 4 files changed, 77 insertions(+), 182 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 31f8074e9..45724531a 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -117,15 +117,7 @@ | r12 | r13 | r14 - | r15 - | v0 - | v1 - | v2 - | v3 - | v4 - | v5 - | v6 - | v7. + | r15. -define(IS_GPR(Reg), (Reg =:= r0 orelse Reg =:= r1 orelse Reg =:= r2 orelse Reg =:= r3 orelse Reg =:= r4 orelse @@ -133,10 +125,6 @@ Reg =:= r10 orelse Reg =:= r11 orelse Reg =:= r12 orelse Reg =:= r13 orelse Reg =:= r14 orelse Reg =:= r15) ). --define(IS_FPR(Reg), - (Reg =:= v0 orelse Reg =:= v1 orelse Reg =:= v2 orelse Reg =:= v3 orelse Reg =:= v4 orelse - Reg =:= v5 orelse Reg =:= v6 orelse Reg =:= v7) -). -type stream() :: any(). @@ -146,7 +134,6 @@ offset :: non_neg_integer(), branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], available_regs :: [aarch64_register()], - available_fpregs :: [aarch64_register()], used_regs :: [aarch64_register()], labels :: [{integer() | reference(), integer()}] }). @@ -199,9 +186,7 @@ -define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000). -define(AVAILABLE_REGS, [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6]). --define(AVAILABLE_FPREGS, [v0, v1, v2, v3, v4, v5, v6, v7]). -define(PARAMETER_REGS, [r0, r1, r2, r3, r4, r5]). --define(PARAMETER_FPREGS, [v0, v1, v2, v3, v4, v5]). -define(SCRATCH_REGS, [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6, r17]). %%----------------------------------------------------------------------------- @@ -239,7 +224,6 @@ new(_Variant, StreamModule, Stream) -> branches = [], offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, - available_fpregs = ?AVAILABLE_FPREGS, used_regs = [], labels = [] }. @@ -313,13 +297,13 @@ free_native_registers(State, [Reg | Rest]) -> -spec free_native_register(state(), value()) -> state(). free_native_register( - #state{available_regs = Available0, available_fpregs = AvailableFP0, used_regs = Used0} = State, + #state{available_regs = Available0, used_regs = Used0} = State, Reg ) when is_atom(Reg) -> - {Available1, AvailableFP1, Used1} = free_reg(Available0, AvailableFP0, Used0, Reg), - State#state{available_regs = Available1, available_fpregs = AvailableFP1, used_regs = Used1}; + {Available1, Used1} = free_reg(Available0, Used0, Reg), + State#state{available_regs = Available1, used_regs = Used1}; free_native_register(State, {ptr, Reg}) -> free_native_register(State, Reg); free_native_register(State, _Other) -> @@ -334,7 +318,7 @@ free_native_register(State, _Other) -> %%----------------------------------------------------------------------------- -spec assert_all_native_free(state()) -> ok. assert_all_native_free(#state{ - available_regs = ?AVAILABLE_REGS, available_fpregs = ?AVAILABLE_FPREGS, used_regs = [] + available_regs = ?AVAILABLE_REGS, used_regs = [] }) -> ok. @@ -482,7 +466,6 @@ return_if_not_equal_to_ctx( stream_module = StreamModule, stream = Stream0, available_regs = AvailableRegs0, - available_fpregs = AvailableFPRegs0, used_regs = UsedRegs0 } = State, {free, Reg} @@ -498,13 +481,10 @@ return_if_not_equal_to_ctx( I4 = jit_aarch64_asm:ret(), I2 = jit_aarch64_asm:bcc(eq, 4 + byte_size(I3) + byte_size(I4)), Stream1 = StreamModule:append(Stream0, <>), - {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( - AvailableRegs0, AvailableFPRegs0, UsedRegs0, Reg - ), + {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, Reg), State#state{ stream = Stream1, available_regs = AvailableRegs1, - available_fpregs = AvailableFPRegs1, used_regs = UsedRegs1 }. @@ -641,8 +621,7 @@ if_else_block( StateElse = State2#state{ stream = Stream4, used_regs = State1#state.used_regs, - available_regs = State1#state.available_regs, - available_fpregs = State1#state.available_fpregs + available_regs = State1#state.available_regs }, State3 = BlockFalseFn(StateElse), Stream5 = State3#state.stream, @@ -905,18 +884,17 @@ if_block_cond( -spec if_block_free_reg(aarch64_register() | {free, aarch64_register()}, state()) -> state(). if_block_free_reg({free, Reg}, State0) -> - #state{available_regs = AvR0, available_fpregs = AvFR0, used_regs = UR0} = State0, - {AvR1, AvFR1, UR1} = free_reg(AvR0, AvFR0, UR0, Reg), + #state{available_regs = AvR0, used_regs = UR0} = State0, + {AvR1, UR1} = free_reg(AvR0, UR0, Reg), State0#state{ available_regs = AvR1, - available_fpregs = AvFR1, used_regs = UR1 }; if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> State0. -spec merge_used_regs(state(), [aarch64_register()]) -> state(). -merge_used_regs(#state{used_regs = UR0, available_regs = AvR0, available_fpregs = AvFR0} = State, [ +merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [ Reg | T ]) -> case lists:member(Reg, UR0) of @@ -924,10 +902,9 @@ merge_used_regs(#state{used_regs = UR0, available_regs = AvR0, available_fpregs merge_used_regs(State, T); false -> AvR1 = lists:delete(Reg, AvR0), - AvFR1 = lists:delete(Reg, AvFR0), UR1 = [Reg | UR0], merge_used_regs( - State#state{used_regs = UR1, available_regs = AvR1, available_fpregs = AvFR1}, T + State#state{used_regs = UR1, available_regs = AvR1}, T ) end; merge_used_regs(State, []) -> @@ -979,7 +956,6 @@ call_func_ptr( stream_module = StreamModule, stream = Stream0, available_regs = AvailableRegs0, - available_fpregs = AvailableFP0, used_regs = UsedRegs0 } = State0, FuncPtrTuple, @@ -1038,15 +1014,11 @@ call_func_ptr( AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), - AvailableFP1 = FreeRegs ++ AvailableFP0, - AvailableFP2 = lists:delete(ResultReg, AvailableFP1), - AvailableFP3 = ?AVAILABLE_FPREGS -- (?AVAILABLE_FPREGS -- AvailableFP2), UsedRegs2 = [ResultReg | UsedRegs1], { State1#state{ stream = Stream6, available_regs = AvailableRegs3, - available_fpregs = AvailableFP3, used_regs = UsedRegs2 }, ResultReg @@ -1079,7 +1051,6 @@ set_args( ArgsRegs = args_regs(Args), AvailableScratchGP = ?SCRATCH_REGS -- ParamRegs -- ArgsRegs -- UsedRegs, - AvailableScratchFP = ?AVAILABLE_FPREGS -- ParamRegs -- ArgsRegs -- UsedRegs, Offset = StreamModule:offset(Stream0), Args1 = [ case Arg of @@ -1088,7 +1059,7 @@ set_args( end || Arg <- Args ], - SetArgsCode = set_args0(Args1, ArgsRegs, ParamRegs, AvailableScratchGP, AvailableScratchFP, []), + SetArgsCode = set_args0(Args1, ArgsRegs, ParamRegs, AvailableScratchGP, []), Stream1 = StreamModule:append(Stream0, SetArgsCode), NewUsedRegs = lists:foldl( fun @@ -1102,35 +1073,30 @@ set_args( State0#state{ stream = Stream1, available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, - available_fpregs = ?AVAILABLE_FPREGS -- ParamRegs -- NewUsedRegs, used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) }. parameter_regs(Args) -> - parameter_regs0(Args, ?PARAMETER_REGS, ?PARAMETER_FPREGS, []). + parameter_regs0(Args, ?PARAMETER_REGS, []). -parameter_regs0([], _, _, Acc) -> +parameter_regs0([], _, Acc) -> lists:reverse(Acc); -parameter_regs0([Special | T], [GPReg | GPRegsT], FPRegs, Acc) when +parameter_regs0([Special | T], [GPReg | GPRegsT], Acc) when Special =:= ctx orelse Special =:= jit_state orelse Special =:= offset -> - parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); -parameter_regs0([{free, Free} | T], GPRegs, FPRegs, Acc) -> - parameter_regs0([Free | T], GPRegs, FPRegs, Acc); -parameter_regs0([{ptr, Reg} | T], [GPReg | GPRegsT], FPRegs, Acc) when ?IS_GPR(Reg) -> - parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); -parameter_regs0([Reg | T], [GPReg | GPRegsT], FPRegs, Acc) when ?IS_GPR(Reg) -> - parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); -parameter_regs0([Reg | T], GPRegs, [FPReg | FPRegsT], Acc) when ?IS_FPR(Reg) -> - parameter_regs0(T, GPRegs, FPRegsT, [FPReg | Acc]); -parameter_regs0([{x_reg, _} | T], [GPReg | GPRegsT], FPRegs, Acc) -> - parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); -parameter_regs0([{y_reg, _} | T], [GPReg | GPRegsT], FPRegs, Acc) -> - parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); -parameter_regs0([{fp_reg, _} | T], GPRegs, [FPReg | FPRegsT], Acc) -> - parameter_regs0(T, GPRegs, FPRegsT, [FPReg | Acc]); -parameter_regs0([Int | T], [GPReg | GPRegsT], FPRegs, Acc) when is_integer(Int) -> - parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]). + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +parameter_regs0([{free, Free} | T], GPRegs, Acc) -> + parameter_regs0([Free | T], GPRegs, Acc); +parameter_regs0([{ptr, Reg} | T], [GPReg | GPRegsT], Acc) when ?IS_GPR(Reg) -> + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +parameter_regs0([Reg | T], [GPReg | GPRegsT], Acc) when ?IS_GPR(Reg) -> + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +parameter_regs0([{x_reg, _} | T], [GPReg | GPRegsT], Acc) -> + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +parameter_regs0([{y_reg, _} | T], [GPReg | GPRegsT], Acc) -> + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +parameter_regs0([Int | T], [GPReg | GPRegsT], Acc) when is_integer(Int) -> + parameter_regs0(T, GPRegsT, [GPReg | Acc]). replace_reg(Args, Reg1, Reg2) -> replace_reg0(Args, Reg1, Reg2, []). @@ -1142,49 +1108,47 @@ replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> replace_reg0([Other | T], Reg, Replacement, Acc) -> replace_reg0(T, Reg, Replacement, [Other | Acc]). -set_args0([], [], [], _AvailGP, _AvailFP, Acc) -> +set_args0([], [], [], _AvailGP, Acc) -> list_to_binary(lists:reverse(Acc)); -set_args0([{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc) -> - set_args0([FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc); -set_args0([ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, AvailFP, Acc) -> - set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc); +set_args0([{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, Acc) -> + set_args0([FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, Acc); +set_args0([ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, Acc) -> + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, Acc); set_args0( [jit_state | ArgsT], [?JITSTATE_REG | ArgsRegs], [?JITSTATE_REG | ParamRegs], AvailGP, - AvailFP, Acc ) -> - set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc); + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, Acc); set_args0( - [jit_state | ArgsT], [?JITSTATE_REG | ArgsRegs], [ParamReg | ParamRegs], AvailGP, AvailFP, Acc + [jit_state | ArgsT], [?JITSTATE_REG | ArgsRegs], [ParamReg | ParamRegs], AvailGP, Acc ) -> false = lists:member(ParamReg, ArgsRegs), - set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, [ + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [ jit_aarch64_asm:mov(ParamReg, ?JITSTATE_REG) | Acc ]); % ctx is special as we need it to access x_reg/y_reg/fp_reg -set_args0([Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, AvailFP, Acc) -> +set_args0([Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, Acc) -> false = lists:member(?CTX_REG, ArgsRegs), J = set_args1(Arg, ?CTX_REG), - set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, [J | Acc]); + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [J | Acc]); set_args0( [Arg | ArgsT], [_ArgReg | ArgsRegs], [ParamReg | ParamRegs], [Avail | AvailGPT] = AvailGP, - AvailFP, Acc ) -> J = set_args1(Arg, ParamReg), case lists:member(ParamReg, ArgsRegs) of false -> - set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, [J | Acc]); + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [J | Acc]); true -> I = jit_aarch64_asm:mov(Avail, ParamReg), NewArgsT = replace_reg(ArgsT, ParamReg, Avail), - set_args0(NewArgsT, ArgsRegs, ParamRegs, AvailGPT, AvailFP, [J, I | Acc]) + set_args0(NewArgsT, ArgsRegs, ParamRegs, AvailGPT, [J, I | Acc]) end. set_args1(Reg, Reg) -> @@ -1214,8 +1178,10 @@ set_args1(Arg, Reg) when is_integer(Arg) -> %% @param Dest vm register to move to %% @return Updated backend state %%----------------------------------------------------------------------------- --spec move_to_vm_register(state(), Src :: value() | vm_register(), Dest :: vm_register()) -> - state(). +-spec move_to_vm_register + (state(), Src :: value() | vm_register(), Dest :: vm_register()) -> state(); + (state(), Src :: {free, {ptr, aarch64_register(), 1}}, Dest :: {fp_reg, non_neg_integer()}) -> + state(). % Native register to VM register move_to_vm_register(State0, Src, {x_reg, extra}) when is_atom(Src) -> I1 = jit_aarch64_asm:str(Src, ?X_REG(?MAX_REG)), @@ -1268,16 +1234,19 @@ move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), State1#state{available_regs = AR0}; +% term_to_float move_to_vm_register( - #state{stream_module = StreamModule, available_regs = [Temp | _], stream = Stream0} = State, - Reg, + #state{stream_module = StreamModule, available_regs = [Temp | _], stream = Stream0} = State0, + {free, {ptr, Reg, 1}}, {fp_reg, F} -) when is_atom(Reg) -> - I1 = jit_aarch64_asm:ldr(Temp, ?FP_REGS), - I2 = jit_aarch64_asm:str(Reg, {Temp, F * 8}), - Code = <>, +) -> + I1 = jit_aarch64_asm:ldr(Reg, {Reg, 8}), + I2 = jit_aarch64_asm:ldr(Temp, ?FP_REGS), + I3 = jit_aarch64_asm:str(Reg, {Temp, F * 8}), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1}. + State1 = free_native_register(State0, Reg), + State1#state{stream = Stream1}. %%----------------------------------------------------------------------------- %% @doc Emit a move of an array element (reg[x]) to a vm or a native register. @@ -1351,8 +1320,7 @@ move_array_element( stream_module = StreamModule, stream = Stream0, available_regs = AvailableRegs0, - used_regs = UsedRegs0, - available_fpregs = AvailableFPRegs0 + used_regs = UsedRegs0 } = State, Reg, {free, IndexReg}, @@ -1360,13 +1328,10 @@ move_array_element( ) when X < ?MAX_REG andalso is_atom(IndexReg) -> I1 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}), I2 = jit_aarch64_asm:str(IndexReg, ?X_REG(X)), - {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( - AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg - ), + {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), Stream1 = StreamModule:append(Stream0, <>), State#state{ available_regs = AvailableRegs1, - available_fpregs = AvailableFPRegs1, used_regs = UsedRegs1, stream = Stream1 }; @@ -1375,8 +1340,7 @@ move_array_element( stream_module = StreamModule, stream = Stream0, available_regs = AvailableRegs0, - used_regs = UsedRegs0, - available_fpregs = AvailableFPRegs0 + used_regs = UsedRegs0 } = State, Reg, {free, IndexReg}, @@ -1384,13 +1348,10 @@ move_array_element( ) when is_atom(IndexReg) -> I1 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}), I2 = jit_aarch64_asm:str(IndexReg, {PtrReg, 0}), - {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( - AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg - ), + {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), Stream1 = StreamModule:append(Stream0, <>), State#state{ available_regs = AvailableRegs1, - available_fpregs = AvailableFPRegs1, used_regs = UsedRegs1, stream = Stream1 }; @@ -1399,8 +1360,7 @@ move_array_element( stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _] = AvailableRegs0, - used_regs = UsedRegs0, - available_fpregs = AvailableFPRegs0 + used_regs = UsedRegs0 } = State, Reg, {free, IndexReg}, @@ -1409,15 +1369,12 @@ move_array_element( I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), I2 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}), I3 = jit_aarch64_asm:str(IndexReg, {Temp, Y * 8}), - {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( - AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg - ), + {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), Stream1 = StreamModule:append( Stream0, <> ), State#state{ available_regs = AvailableRegs1, - available_fpregs = AvailableFPRegs1, used_regs = UsedRegs1, stream = Stream1 }. @@ -1585,22 +1542,7 @@ move_to_native_register( I2 = jit_aarch64_asm:ldr(Reg, {Reg, Y * 8}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), - {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}; -move_to_native_register( - #state{ - stream_module = StreamModule, - stream = Stream0, - available_regs = [Temp | _], - available_fpregs = [FPReg | AvailFT], - used_regs = Used - } = State, - {fp_reg, F} -) -> - I1 = jit_aarch64_asm:ldr(Temp, ?FP_REGS), - I2 = jit_aarch64_asm:ldr_d(FPReg, {Temp, F * 8}), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - {State#state{stream = Stream1, available_fpregs = AvailFT, used_regs = [FPReg | Used]}, FPReg}. + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}. -spec move_to_native_register(state(), value(), aarch64_register()) -> state(). move_to_native_register( @@ -1988,16 +1930,11 @@ return_labels_and_lines( ), State#state{stream = Stream1}. -free_reg(AvailableRegs0, AvailableFPRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> +free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), true = lists:member(Reg, UsedRegs0), UsedRegs1 = lists:delete(Reg, UsedRegs0), - {AvailableRegs1, AvailableFPRegs0, UsedRegs1}; -free_reg(AvailableRegs0, AvailableFPRegs0, UsedRegs0, Reg) when ?IS_FPR(Reg) -> - AvailableFPRegs1 = free_reg0(?AVAILABLE_FPREGS, AvailableFPRegs0, Reg, []), - true = lists:member(Reg, UsedRegs0), - UsedRegs1 = lists:delete(Reg, UsedRegs0), - {AvailableRegs0, AvailableFPRegs1, UsedRegs1}. + {AvailableRegs1, UsedRegs1}. free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) -> lists:reverse(Acc, [Reg | PrevRegs0]); diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index 85ef11198..57f56819f 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -39,7 +39,6 @@ and_/3, ldr/2, ldr_w/2, - ldr_d/2, ldr/3, lsl/3, lsr/3, @@ -99,8 +98,6 @@ | sp | xzr. --type aarch64_simd_register() :: v0 | v1 | v2 | v3 | v4 | v5 | v6 | v7 | v30 | v31. - -type cc() :: eq | ne | cs | cc | mi | pl | vs | vc | hi | ls | ge | lt | gt | le | al | nv. %% Emit an ADD instruction (AArch64 encoding) @@ -207,21 +204,6 @@ ldr(Xt, {Xn, Xm, lsl, Amount}) when (16#F8606800 bor (XmNum bsl 16) bor (S bsl 12) bor (XnNum bsl 5) bor XtNum):32/little >>. --spec ldr_d(aarch64_simd_register(), {aarch64_gpr_register(), integer()}) -> binary(). -ldr_d(Dt, {Rn, Offset}) when - is_atom(Dt), - is_atom(Rn), - is_integer(Offset), - Offset >= 0, - Offset =< 32760, - (Offset rem 8) =:= 0 --> - DtNum = simd_reg_to_num(Dt), - RnNum = reg_to_num(Rn), - << - (16#FD400000 bor ((Offset div 8) bsl 10) bor (RnNum bsl 5) bor DtNum):32/little - >>. - %% Emit a load register (LDR) instruction for 32-bit load from memory (AArch64 encoding) %% Dst is destination register atom, Src is {BaseReg, Offset} tuple -spec ldr_w(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary(). @@ -731,17 +713,6 @@ reg_to_num(sp) -> 31; %% Zero register (XZR) is also r31 reg_to_num(xzr) -> 31. -simd_reg_to_num(v0) -> 0; -simd_reg_to_num(v1) -> 1; -simd_reg_to_num(v2) -> 2; -simd_reg_to_num(v3) -> 3; -simd_reg_to_num(v4) -> 4; -simd_reg_to_num(v5) -> 5; -simd_reg_to_num(v6) -> 6; -simd_reg_to_num(v7) -> 7; -simd_reg_to_num(v30) -> 30; -simd_reg_to_num(v31) -> 31. - %% Emit a conditional branch instruction -spec bcc(cc(), integer()) -> binary(). bcc(Cond, Offset) when is_atom(Cond), is_integer(Offset) -> diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index 08fd4abca..fffe7a1ab 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -165,16 +165,6 @@ str_w_test_() -> ) ]. -ldr_d_test_() -> - [ - ?_assertEqual( - asm(<<16#fd40001e:32/little>>, "ldr d30, [x0]"), jit_aarch64_asm:ldr_d(v30, {r0, 0}) - ), - ?_assertEqual( - asm(<<16#fd400420:32/little>>, "ldr d0, [x1, #8]"), jit_aarch64_asm:ldr_d(v0, {r1, 8}) - ) - ]. - mov_test_() -> [ % mov immediate - simple cases diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 6005a4952..3c7438f14 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -1337,10 +1337,18 @@ move_to_vm_register_test_() -> end), %% Test: ptr with offset to fp_reg (term_to_float) ?_test(begin - move_to_vm_register_test0(State0, {free, {ptr, r9, 1}}, {fp_reg, 3}, << - " 0: f9400127 ldr x7, [x9]\n" - " 4: f9002407 str x7, [x0, #72]" - >>) + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:move_to_vm_register( + State1, {free, {ptr, RegA, 1}}, {fp_reg, 3} + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f94004e7 ldr x7, [x7, #8]\n" + " 8: f9406008 ldr x8, [x0, #192]\n" + " c: f9000d07 str x7, [x8, #24]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) end) ] end}. @@ -1512,7 +1520,7 @@ move_to_array_element_test_() -> %% move_to_array_element/5: x_reg to reg[x+offset] ?_test(begin State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), - State2 = setelement(8, State1, [r8, r9]), + State2 = setelement(7, State1, [r8, r9]), [r8, r9] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r8, r9, 1), Stream = ?BACKEND:stream(State3), @@ -1526,7 +1534,7 @@ move_to_array_element_test_() -> %% move_to_array_element/5: imm to reg[x+offset] ?_test(begin State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), - State2 = setelement(8, State1, [r8, r9]), + State2 = setelement(7, State1, [r8, r9]), [r8, r9] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, 42, r8, r9, 1), Stream = ?BACKEND:stream(State3), @@ -1588,17 +1596,6 @@ move_to_native_register_test_() -> >>, ?assertEqual(dump_to_bin(Dump), Stream) end), - %% move_to_native_register/2: {fp_reg, N} - ?_test(begin - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {fp_reg, 3}), - Stream = ?BACKEND:stream(State1), - ?assertEqual(v0, Reg), - Dump = << - " 0: f9406007 ldr x7, [x0, #192]\n" - " 4: fd400ce0 ldr d0, [x7, #24]" - >>, - ?assertEqual(dump_to_bin(Dump), Stream) - end), %% move_to_native_register/3: imm to reg ?_test(begin State1 = ?BACKEND:move_to_native_register(State0, 42, r8), From bc86c88038623f972fb74c4e659fc31d13f91d35 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 20 Sep 2025 14:57:12 +0200 Subject: [PATCH 43/46] AArch64: fix tests after primitive renumbering Signed-off-by: Paul Guyot --- tests/libs/jit/jit_aarch64_tests.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 3c7438f14..daa6bcad6 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -1095,7 +1095,7 @@ call_fun_test() -> " 30: 540000c0 b.eq 0x48 // b.none\n" " 34: f9404c47 ldr x7, [x2, #152]\n" " 38: d2800702 mov x2, #0x38 // #56\n" - " 3c: d2804163 mov x3, #0x20b // #523\n" + " 3c: d2803163 mov x3, #0x18b // #395\n" " 40: aa0803e4 mov x4, x8\n" " 44: d61f00e0 br x7\n" " 48: 927ef508 and x8, x8, #0xfffffffffffffffc\n" @@ -1105,7 +1105,7 @@ call_fun_test() -> " 58: 540000c0 b.eq 0x70 // b.none\n" " 5c: f9404c47 ldr x7, [x2, #152]\n" " 60: d2800c02 mov x2, #0x60 // #96\n" - " 64: d2804163 mov x3, #0x20b // #523\n" + " 64: d2803163 mov x3, #0x18b // #395\n" " 68: aa0803e4 mov x4, x8\n" " 6c: d61f00e0 br x7\n" " 70: f9400028 ldr x8, [x1]\n" From 593be361a941b820f4e0fd656dd368f6c8898d0b Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Wed, 17 Sep 2025 21:22:27 +0200 Subject: [PATCH 44/46] AArch64: use binutils helper for asm tests Signed-off-by: Paul Guyot --- tests/libs/jit/jit_aarch64_asm_tests.erl | 552 +++++++++++------------ 1 file changed, 259 insertions(+), 293 deletions(-) diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index fffe7a1ab..67e009e8a 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -24,209 +24,244 @@ -include_lib("eunit/include/eunit.hrl"). -endif. +-define(_assertAsmEqual(Bin, Str, Value), + ?_assertEqual(jit_tests_common:asm(aarch64, Bin, Str), Value) +). + add_test_() -> [ - ?_assertEqual( - asm(<<16#9100e0e7:32/little>>, "add x7, x7, #56"), jit_aarch64_asm:add(r7, r7, 56) + ?_assertAsmEqual( + <<16#9100e0e7:32/little>>, "add x7, x7, #56", jit_aarch64_asm:add(r7, r7, 56) ), - ?_assertEqual( - asm(<<16#91000000:32/little>>, "add x0, x0, #0"), jit_aarch64_asm:add(r0, r0, 0) + ?_assertAsmEqual( + <<16#91000000:32/little>>, "add x0, x0, #0", jit_aarch64_asm:add(r0, r0, 0) ), - ?_assertEqual( - asm(<<16#91000421:32/little>>, "add x1, x1, #1"), jit_aarch64_asm:add(r1, r1, 1) + ?_assertAsmEqual( + <<16#91000421:32/little>>, "add x1, x1, #1", jit_aarch64_asm:add(r1, r1, 1) ), - ?_assertEqual( - asm(<<16#8b031041:32/little>>, "add x1, x2, x3, lsl #4"), + ?_assertAsmEqual( + <<16#8b031041:32/little>>, + "add x1, x2, x3, lsl #4", jit_aarch64_asm:add(r1, r2, r3, {lsl, 4}) ), - ?_assertEqual( - asm(<<16#8b030041:32/little>>, "add x1, x2, x3"), jit_aarch64_asm:add(r1, r2, r3) + ?_assertAsmEqual( + <<16#8b030041:32/little>>, "add x1, x2, x3", jit_aarch64_asm:add(r1, r2, r3) ) ]. sub_test_() -> [ - ?_assertEqual( - asm(<<16#d100e0e7:32/little>>, "sub x7, x7, #56"), jit_aarch64_asm:sub(r7, r7, 56) + ?_assertAsmEqual( + <<16#d100e0e7:32/little>>, "sub x7, x7, #56", jit_aarch64_asm:sub(r7, r7, 56) ), - ?_assertEqual( - asm(<<16#d1000000:32/little>>, "sub x0, x0, #0"), jit_aarch64_asm:sub(r0, r0, 0) + ?_assertAsmEqual( + <<16#d1000000:32/little>>, "sub x0, x0, #0", jit_aarch64_asm:sub(r0, r0, 0) ), - ?_assertEqual( - asm(<<16#d1000421:32/little>>, "sub x1, x1, #1"), jit_aarch64_asm:sub(r1, r1, 1) + ?_assertAsmEqual( + <<16#d1000421:32/little>>, "sub x1, x1, #1", jit_aarch64_asm:sub(r1, r1, 1) ), - ?_assertEqual( - asm(<<16#cb031041:32/little>>, "sub x1, x2, x3, lsl #4"), + ?_assertAsmEqual( + <<16#cb031041:32/little>>, + "sub x1, x2, x3, lsl #4", jit_aarch64_asm:sub(r1, r2, r3, {lsl, 4}) ), - ?_assertEqual( - asm(<<16#cb030041:32/little>>, "sub x1, x2, x3"), jit_aarch64_asm:sub(r1, r2, r3) + ?_assertAsmEqual( + <<16#cb030041:32/little>>, "sub x1, x2, x3", jit_aarch64_asm:sub(r1, r2, r3) ) ]. madd_test_() -> [ - ?_assertEqual( - asm(<<16#9b037c41:32/little>>, "mul x1, x2, x3"), jit_aarch64_asm:mul(r1, r2, r3) + ?_assertAsmEqual( + <<16#9b037c41:32/little>>, "mul x1, x2, x3", jit_aarch64_asm:mul(r1, r2, r3) ), - ?_assertEqual( - asm(<<16#9b031041:32/little>>, "madd x1, x2, x3, x4"), - jit_aarch64_asm:madd(r1, r2, r3, r4) + ?_assertAsmEqual( + <<16#9b031041:32/little>>, "madd x1, x2, x3, x4", jit_aarch64_asm:madd(r1, r2, r3, r4) ) ]. b_test_() -> [ - ?_assertEqual(<<16#14000000:32/little>>, jit_aarch64_asm:b(0)), - ?_assertEqual(<<16#14000004:32/little>>, jit_aarch64_asm:b(16)), - ?_assertEqual(<<16#17fffff0:32/little>>, jit_aarch64_asm:b(-64)), - ?_assertEqual(<<16#14000001:32/little>>, jit_aarch64_asm:b(4)) + ?_assertAsmEqual(<<16#14000000:32/little>>, "b .+0", jit_aarch64_asm:b(0)), + ?_assertAsmEqual(<<16#14000004:32/little>>, "b .+16", jit_aarch64_asm:b(16)), + ?_assertAsmEqual(<<16#17fffff0:32/little>>, "b .-64", jit_aarch64_asm:b(-64)), + ?_assertAsmEqual(<<16#14000001:32/little>>, "b .+4", jit_aarch64_asm:b(4)) ]. brk_test_() -> [ - ?_assertEqual(<<16#D4200000:32/little>>, jit_aarch64_asm:brk(0)), - ?_assertEqual(<<16#D4201900:32/little>>, jit_aarch64_asm:brk(200)) + ?_assertAsmEqual(<<16#D4200000:32/little>>, "brk #0", jit_aarch64_asm:brk(0)), + ?_assertAsmEqual(<<16#D4201900:32/little>>, "brk #200", jit_aarch64_asm:brk(200)) ]. blr_test_() -> [ - ?_assertEqual(<<16#D63F0000:32/little>>, jit_aarch64_asm:blr(r0)), - ?_assertEqual(<<16#D63F0020:32/little>>, jit_aarch64_asm:blr(r1)), - ?_assertEqual(<<16#D63F01A0:32/little>>, jit_aarch64_asm:blr(r13)) + ?_assertAsmEqual(<<16#D63F0000:32/little>>, "blr x0", jit_aarch64_asm:blr(r0)), + ?_assertAsmEqual(<<16#D63F0020:32/little>>, "blr x1", jit_aarch64_asm:blr(r1)), + ?_assertAsmEqual(<<16#D63F01A0:32/little>>, "blr x13", jit_aarch64_asm:blr(r13)) ]. br_test_() -> [ - ?_assertEqual(<<16#D61F0000:32/little>>, jit_aarch64_asm:br(r0)), - ?_assertEqual(<<16#D61F0020:32/little>>, jit_aarch64_asm:br(r1)), - ?_assertEqual(<<16#D61F01A0:32/little>>, jit_aarch64_asm:br(r13)) + ?_assertAsmEqual(<<16#D61F0000:32/little>>, "br x0", jit_aarch64_asm:br(r0)), + ?_assertAsmEqual(<<16#D61F0020:32/little>>, "br x1", jit_aarch64_asm:br(r1)), + ?_assertAsmEqual(<<16#D61F01A0:32/little>>, "br x13", jit_aarch64_asm:br(r13)) ]. ldr_test_() -> [ - ?_assertEqual(<<16#F9400421:32/little>>, jit_aarch64_asm:ldr(r1, {r1, 8})), - ?_assertEqual(<<16#F9403042:32/little>>, jit_aarch64_asm:ldr(r2, {r2, 96})), + ?_assertAsmEqual( + <<16#F9400421:32/little>>, "ldr x1, [x1, #8]", jit_aarch64_asm:ldr(r1, {r1, 8}) + ), + ?_assertAsmEqual( + <<16#F9403042:32/little>>, "ldr x2, [x2, #96]", jit_aarch64_asm:ldr(r2, {r2, 96}) + ), % Load-update (writeback) with SP, negative offset - ?_assertEqual( + ?_assertAsmEqual( <<16#F85F0FE7:32/little>>, + "ldr x7, [sp, #-16]!", jit_aarch64_asm:ldr(r7, {sp, -16}, '!') ), % Load-update (writeback) with SP, positive offset - ?_assertEqual( - <<16#F8410FE7:32/little>>, - jit_aarch64_asm:ldr(r7, {sp, 16}, '!') + ?_assertAsmEqual( + <<16#F8410FE7:32/little>>, "ldr x7, [sp, #16]!", jit_aarch64_asm:ldr(r7, {sp, 16}, '!') ), % Load-update (writeback) with SP, zero offset - ?_assertEqual( - <<16#F84007E7:32/little>>, - jit_aarch64_asm:ldr(r7, {sp}, 0) + ?_assertAsmEqual( + <<16#F84007E7:32/little>>, "ldr x7, [sp], #0", jit_aarch64_asm:ldr(r7, {sp}, 0) ), % shift - ?_assertEqual( + ?_assertAsmEqual( <<16#f8637841:32/little>>, + "ldr x1, [x2, x3, lsl #3]", jit_aarch64_asm:ldr(r1, {r2, r3, lsl, 3}) ), - ?_assertEqual( + ?_assertAsmEqual( <<16#f8677907:32/little>>, + "ldr x7, [x8, x7, lsl #3]", jit_aarch64_asm:ldr(r7, {r8, r7, lsl, 3}) ), - ?_assertEqual( - <<16#f8636841:32/little>>, - jit_aarch64_asm:ldr(r1, {r2, r3}) + ?_assertAsmEqual( + <<16#f8636841:32/little>>, "ldr x1, [x2, x3]", jit_aarch64_asm:ldr(r1, {r2, r3}) ) ]. ldr_w_test_() -> [ - ?_assertEqual( - asm(<<16#b9400821:32/little>>, "ldr w1, [x1, 8]"), jit_aarch64_asm:ldr_w(r1, {r1, 8}) + ?_assertAsmEqual( + <<16#b9400821:32/little>>, "ldr w1, [x1, 8]", jit_aarch64_asm:ldr_w(r1, {r1, 8}) ), - ?_assertEqual( - asm(<<16#b9406042:32/little>>, "ldr w2, [x2, 96]"), jit_aarch64_asm:ldr_w(r2, {r2, 96}) + ?_assertAsmEqual( + <<16#b9406042:32/little>>, "ldr w2, [x2, 96]", jit_aarch64_asm:ldr_w(r2, {r2, 96}) ), - ?_assertEqual( - asm(<<16#b97ffc60:32/little>>, "ldr w0, [x3, 16380]"), - jit_aarch64_asm:ldr_w(r0, {r3, 16380}) + ?_assertAsmEqual( + <<16#b97ffc60:32/little>>, "ldr w0, [x3, 16380]", jit_aarch64_asm:ldr_w(r0, {r3, 16380}) ) ]. str_w_test_() -> [ - ?_assertEqual( - asm(<<16#b9000821:32/little>>, "str w1, [x1, 8]"), jit_aarch64_asm:str_w(r1, {r1, 8}) + ?_assertAsmEqual( + <<16#b9000821:32/little>>, "str w1, [x1, 8]", jit_aarch64_asm:str_w(r1, {r1, 8}) ), - ?_assertEqual( - asm(<<16#b9006042:32/little>>, "str w2, [x2, 96]"), jit_aarch64_asm:str_w(r2, {r2, 96}) + ?_assertAsmEqual( + <<16#b9006042:32/little>>, "str w2, [x2, 96]", jit_aarch64_asm:str_w(r2, {r2, 96}) ), - ?_assertEqual( - asm(<<16#b93ffc60:32/little>>, "str w0, [x3, 16380]"), - jit_aarch64_asm:str_w(r0, {r3, 16380}) + ?_assertAsmEqual( + <<16#b93ffc60:32/little>>, "str w0, [x3, 16380]", jit_aarch64_asm:str_w(r0, {r3, 16380}) ) ]. mov_test_() -> [ % mov immediate - simple cases - ?_assertEqual(<<16#D2800000:32/little>>, jit_aarch64_asm:mov(r0, 0)), - ?_assertEqual(<<16#D2801901:32/little>>, jit_aarch64_asm:mov(r1, 200)), - ?_assertEqual(<<16#d28000b3:32/little>>, jit_aarch64_asm:mov(r19, 5)), - ?_assertEqual(<<16#92800094:32/little>>, jit_aarch64_asm:mov(r20, -5)), - ?_assertEqual(<<16#d2800015:32/little>>, jit_aarch64_asm:mov(r21, 0)), - ?_assertEqual(<<16#d29ffff0:32/little>>, jit_aarch64_asm:mov(r16, 16#FFFF)), - ?_assertEqual(<<16#929fffcf:32/little>>, jit_aarch64_asm:mov(r15, -16#FFFF)), + ?_assertAsmEqual(<<16#D2800000:32/little>>, "mov x0, #0", jit_aarch64_asm:mov(r0, 0)), + ?_assertAsmEqual(<<16#D2801901:32/little>>, "mov x1, #200", jit_aarch64_asm:mov(r1, 200)), + ?_assertAsmEqual(<<16#d28000b3:32/little>>, "mov x19, #5", jit_aarch64_asm:mov(r19, 5)), + ?_assertAsmEqual(<<16#92800094:32/little>>, "mov x20, #-5", jit_aarch64_asm:mov(r20, -5)), + ?_assertAsmEqual(<<16#d2800015:32/little>>, "mov x21, #0", jit_aarch64_asm:mov(r21, 0)), + ?_assertAsmEqual( + <<16#d29ffff0:32/little>>, "mov x16, #0xffff", jit_aarch64_asm:mov(r16, 16#FFFF) + ), + ?_assertAsmEqual( + <<16#929fffcf:32/little>>, "mov x15, #-0xffff", jit_aarch64_asm:mov(r15, -16#FFFF) + ), % mov immediate - complex cases requiring multiple instructions - ?_assertEqual(<<16#d2a00052:32/little>>, jit_aarch64_asm:mov(r18, 16#20000)), - ?_assertEqual(<<16#b26fbbf1:32/little>>, jit_aarch64_asm:mov(r17, -131072)), + ?_assertAsmEqual( + <<16#d2a00052:32/little>>, "mov x18, #0x20000", jit_aarch64_asm:mov(r18, 16#20000) + ), + ?_assertAsmEqual( + <<16#b26fbbf1:32/little>>, "mov x17, #-0x20000", jit_aarch64_asm:mov(r17, -131072) + ), % mov immediate - very large value requiring multiple instructions - ?_assertEqual( + ?_assertAsmEqual( <<16#D29579A1:32/little, 16#F2B7C041:32/little, 16#F2DFD741:32/little, 16#F2EFF941:32/little>>, + "mov x1, #0xabcd\n" + "movk x1, #0xbe02, lsl #16\n" + "movk x1, #0xfeba, lsl #32\n" + "movk x1, #0x7fca, lsl #48", jit_aarch64_asm:mov(r1, 9208452466117618637) ), % mov register - ?_assertEqual(<<16#AA0103E0:32/little>>, jit_aarch64_asm:mov(r0, r1)), - ?_assertEqual(<<16#AA0703E1:32/little>>, jit_aarch64_asm:mov(r1, r7)) + ?_assertAsmEqual(<<16#AA0103E0:32/little>>, "mov x0, x1", jit_aarch64_asm:mov(r0, r1)), + ?_assertAsmEqual(<<16#AA0703E1:32/little>>, "mov x1, x7", jit_aarch64_asm:mov(r1, r7)) ]. orr_test_() -> [ % ORR Rd, XZR, Rm (MOV) - ?_assertEqual(<<16#AA0103E0:32/little>>, jit_aarch64_asm:orr(r0, xzr, r1)), + ?_assertAsmEqual( + <<16#AA0103E0:32/little>>, "orr x0, xzr, x1", jit_aarch64_asm:orr(r0, xzr, r1) + ), % ORR Rd, Rn, Rm - ?_assertEqual(<<16#AA010020:32/little>>, jit_aarch64_asm:orr(r0, r1, r1)), - ?_assertEqual(<<16#AA020041:32/little>>, jit_aarch64_asm:orr(r1, r2, r2)) + ?_assertAsmEqual( + <<16#AA010020:32/little>>, "orr x0, x1, x1", jit_aarch64_asm:orr(r0, r1, r1) + ), + ?_assertAsmEqual( + <<16#AA020041:32/little>>, "orr x1, x2, x2", jit_aarch64_asm:orr(r1, r2, r2) + ) ]. str_test_() -> [ - ?_assertEqual(<<16#F9000421:32/little>>, jit_aarch64_asm:str(r1, {r1, 8})), - ?_assertEqual(<<16#F9003042:32/little>>, jit_aarch64_asm:str(r2, {r2, 96})), + ?_assertAsmEqual( + <<16#F9000421:32/little>>, "str x1, [x1, #8]", jit_aarch64_asm:str(r1, {r1, 8}) + ), + ?_assertAsmEqual( + <<16#F9003042:32/little>>, "str x2, [x2, #96]", jit_aarch64_asm:str(r2, {r2, 96}) + ), % str with xzr (zero register) - stores zero to memory - ?_assertEqual(<<16#F900001F:32/little>>, jit_aarch64_asm:str(xzr, {r0, 0})), - ?_assertEqual(<<16#F900043F:32/little>>, jit_aarch64_asm:str(xzr, {r1, 8})), - ?_assertEqual(<<16#F900085F:32/little>>, jit_aarch64_asm:str(xzr, {r2, 16})), + ?_assertAsmEqual( + <<16#F900001F:32/little>>, "str xzr, [x0]", jit_aarch64_asm:str(xzr, {r0, 0}) + ), + ?_assertAsmEqual( + <<16#F900043F:32/little>>, "str xzr, [x1, #8]", jit_aarch64_asm:str(xzr, {r1, 8}) + ), + ?_assertAsmEqual( + <<16#F900085F:32/little>>, "str xzr, [x2, #16]", jit_aarch64_asm:str(xzr, {r2, 16}) + ), % Store-update (writeback) with SP - ?_assertEqual( + ?_assertAsmEqual( <<16#F81F0FE7:32/little>>, + "str x7, [sp, #-16]!", jit_aarch64_asm:str(r7, {sp, -16}, '!') ), % Store-update (writeback) with SP, positive offset - ?_assertEqual( - <<16#F8010FE7:32/little>>, - jit_aarch64_asm:str(r7, {sp, 16}, '!') + ?_assertAsmEqual( + <<16#F8010FE7:32/little>>, "str x7, [sp, #16]!", jit_aarch64_asm:str(r7, {sp, 16}, '!') ), % Store-update (writeback) with SP, zero offset - ?_assertEqual( - <<16#F80007E7:32/little>>, - jit_aarch64_asm:str(r7, {sp}, 0) + ?_assertAsmEqual( + <<16#F80007E7:32/little>>, "str x7, [sp], #0", jit_aarch64_asm:str(r7, {sp}, 0) ), % shift - ?_assertEqual( + ?_assertAsmEqual( <<16#f8237841:32/little>>, + "str x1, [x2, x3, lsl #3]", jit_aarch64_asm:str(r1, {r2, r3, lsl, 3}) ) ]. @@ -234,230 +269,200 @@ str_test_() -> cmp_test_() -> [ % cmp reg, reg - ?_assertEqual(<<16#EB01001F:32/little>>, jit_aarch64_asm:cmp(r0, r1)), + ?_assertAsmEqual(<<16#EB01001F:32/little>>, "cmp x0, x1", jit_aarch64_asm:cmp(r0, r1)), % cmp reg, imm - ?_assertEqual(<<16#F100001F:32/little>>, jit_aarch64_asm:cmp(r0, 0)), - ?_assertEqual(<<16#F103001F:32/little>>, jit_aarch64_asm:cmp(r0, 192)) + ?_assertAsmEqual(<<16#F100001F:32/little>>, "cmp x0, #0", jit_aarch64_asm:cmp(r0, 0)), + ?_assertAsmEqual(<<16#F103001F:32/little>>, "cmp x0, #192", jit_aarch64_asm:cmp(r0, 192)) ]. cmp_w_test_() -> [ % cmp_w reg, imm - ?_assertEqual(<<16#7100001F:32/little>>, jit_aarch64_asm:cmp_w(r0, 0)), - ?_assertEqual(<<16#7103001F:32/little>>, jit_aarch64_asm:cmp_w(r0, 192)) + ?_assertAsmEqual(<<16#7100001F:32/little>>, "cmp w0, #0", jit_aarch64_asm:cmp_w(r0, 0)), + ?_assertAsmEqual(<<16#7103001F:32/little>>, "cmp w0, #192", jit_aarch64_asm:cmp_w(r0, 192)) ]. and_test_() -> [ % AND reg, reg, reg - ?_assertEqual(<<16#8A010020:32/little>>, jit_aarch64_asm:and_(r0, r1, r1)), + ?_assertAsmEqual( + <<16#8A010020:32/little>>, "and x0, x1, x1", jit_aarch64_asm:and_(r0, r1, r1) + ), % AND reg, reg, imm - ?_assertEqual(<<16#927A0420:32/little>>, jit_aarch64_asm:and_(r0, r1, 192)), - ?_assertEqual(<<16#927ff8e7:32/little>>, jit_aarch64_asm:and_(r7, r7, -2)), - ?_assertEqual(<<16#9200cc41:32/little>>, jit_aarch64_asm:and_(r1, r2, 16#f0f0f0f0f0f0f0f)), - ?_assertEqual(<<16#92603c62:32/little>>, jit_aarch64_asm:and_(r2, r3, 16#ffff00000000)), - ?_assertEqual(<<16#92785c83:32/little>>, jit_aarch64_asm:and_(r3, r4, 16#ffffff00)) + ?_assertAsmEqual( + <<16#927A0420:32/little>>, "and x0, x1, #0xc0", jit_aarch64_asm:and_(r0, r1, 192) + ), + ?_assertAsmEqual( + <<16#927ff8e7:32/little>>, + "and x7, x7, #0xfffffffffffffffe", + jit_aarch64_asm:and_(r7, r7, -2) + ), + ?_assertAsmEqual( + <<16#9200cc41:32/little>>, + "and x1, x2, #0xf0f0f0f0f0f0f0f", + jit_aarch64_asm:and_(r1, r2, 16#f0f0f0f0f0f0f0f) + ), + ?_assertAsmEqual( + <<16#92603c62:32/little>>, + "and x2, x3, #0xffff00000000", + jit_aarch64_asm:and_(r2, r3, 16#ffff00000000) + ), + ?_assertAsmEqual( + <<16#92785c83:32/little>>, + "and x3, x4, #0xffffff00", + jit_aarch64_asm:and_(r3, r4, 16#ffffff00) + ) ]. lsl_test_() -> [ - ?_assertEqual(<<16#D3607C00:32/little>>, jit_aarch64_asm:lsl(r0, r0, 32)) + ?_assertAsmEqual( + <<16#D3607C00:32/little>>, "lsl x0, x0, #32", jit_aarch64_asm:lsl(r0, r0, 32) + ) ]. lsr_test_() -> [ - ?_assertEqual( - asm(<<16#D340FC00:32/little>>, "lsr x0, x0, 0"), jit_aarch64_asm:lsr(r0, r0, 0) + ?_assertAsmEqual( + <<16#D340FC00:32/little>>, "lsr x0, x0, 0", jit_aarch64_asm:lsr(r0, r0, 0) ), - ?_assertEqual( - asm(<<16#D340FC01:32/little>>, "lsr x1, x0, 0"), jit_aarch64_asm:lsr(r1, r0, 0) + ?_assertAsmEqual( + <<16#D340FC01:32/little>>, "lsr x1, x0, 0", jit_aarch64_asm:lsr(r1, r0, 0) ), - ?_assertEqual(<<16#D360FC00:32/little>>, jit_aarch64_asm:lsr(r0, r0, 32)) + ?_assertAsmEqual( + <<16#D360FC00:32/little>>, "lsr x0, x0, #32", jit_aarch64_asm:lsr(r0, r0, 32) + ) ]. ret_test_() -> [ - ?_assertEqual(asm(<<16#D65F03C0:32/little>>, "ret"), jit_aarch64_asm:ret()) + ?_assertAsmEqual(<<16#D65F03C0:32/little>>, "ret", jit_aarch64_asm:ret()) ]. tst_test_() -> [ - ?_assertEqual(asm(<<16#EA01001F:32/little>>, "tst x0, x1"), jit_aarch64_asm:tst(r0, r1)), - ?_assertEqual(asm(<<16#f240003f:32/little>>, "tst x1, #1"), jit_aarch64_asm:tst(r1, 1)), - ?_assertEqual(asm(<<16#f27c005f:32/little>>, "tst x2, #16"), jit_aarch64_asm:tst(r2, 16)), - ?_assertEqual(asm(<<16#f2401c7f:32/little>>, "tst x3, #255"), jit_aarch64_asm:tst(r3, 255)), - ?_assertEqual( - asm(<<16#f240249f:32/little>>, "tst x4, #1023"), jit_aarch64_asm:tst(r4, 1023) - ), - ?_assertEqual(asm(<<16#f24014bf:32/little>>, "tst x5, #63"), jit_aarch64_asm:tst(r5, 63)), - ?_assertEqual(asm(<<16#f27b00df:32/little>>, "tst x6, #32"), jit_aarch64_asm:tst(r6, 32)), - ?_assertEqual(asm(<<16#f27a00ff:32/little>>, "tst x7, #64"), jit_aarch64_asm:tst(r7, 64)), - ?_assertEqual(asm(<<16#f27e051f:32/little>>, "tst x8, #0xc"), jit_aarch64_asm:tst(r8, 16#c)) + ?_assertAsmEqual(<<16#EA01001F:32/little>>, "tst x0, x1", jit_aarch64_asm:tst(r0, r1)), + ?_assertAsmEqual(<<16#f240003f:32/little>>, "tst x1, #1", jit_aarch64_asm:tst(r1, 1)), + ?_assertAsmEqual(<<16#f27c005f:32/little>>, "tst x2, #16", jit_aarch64_asm:tst(r2, 16)), + ?_assertAsmEqual(<<16#f2401c7f:32/little>>, "tst x3, #255", jit_aarch64_asm:tst(r3, 255)), + ?_assertAsmEqual(<<16#f240249f:32/little>>, "tst x4, #1023", jit_aarch64_asm:tst(r4, 1023)), + ?_assertAsmEqual(<<16#f24014bf:32/little>>, "tst x5, #63", jit_aarch64_asm:tst(r5, 63)), + ?_assertAsmEqual(<<16#f27b00df:32/little>>, "tst x6, #32", jit_aarch64_asm:tst(r6, 32)), + ?_assertAsmEqual(<<16#f27a00ff:32/little>>, "tst x7, #64", jit_aarch64_asm:tst(r7, 64)), + ?_assertAsmEqual(<<16#f27e051f:32/little>>, "tst x8, #0xc", jit_aarch64_asm:tst(r8, 16#c)) ]. tst_w_test_() -> [ - ?_assertEqual(asm(<<16#6a01001f:32/little>>, "tst w0, w1"), jit_aarch64_asm:tst_w(r0, r1)), - ?_assertEqual(asm(<<16#7200003f:32/little>>, "tst w1, #1"), jit_aarch64_asm:tst_w(r1, 1)), - ?_assertEqual(asm(<<16#721c005f:32/little>>, "tst w2, #16"), jit_aarch64_asm:tst_w(r2, 16)), - ?_assertEqual( - asm(<<16#72001c7f:32/little>>, "tst w3, #255"), jit_aarch64_asm:tst_w(r3, 255) - ), - ?_assertEqual( - asm(<<16#7200249f:32/little>>, "tst w4, #1023"), jit_aarch64_asm:tst_w(r4, 1023) - ), - ?_assertEqual(asm(<<16#720014bf:32/little>>, "tst w5, #63"), jit_aarch64_asm:tst_w(r5, 63)), - ?_assertEqual(asm(<<16#721b00df:32/little>>, "tst w6, #32"), jit_aarch64_asm:tst_w(r6, 32)), - ?_assertEqual(asm(<<16#721a00ff:32/little>>, "tst w7, #64"), jit_aarch64_asm:tst_w(r7, 64)), - ?_assertEqual( - asm(<<16#721e051f:32/little>>, "tst w8, #0xc"), jit_aarch64_asm:tst_w(r8, 16#c) - ) + ?_assertAsmEqual(<<16#6a01001f:32/little>>, "tst w0, w1", jit_aarch64_asm:tst_w(r0, r1)), + ?_assertAsmEqual(<<16#7200003f:32/little>>, "tst w1, #1", jit_aarch64_asm:tst_w(r1, 1)), + ?_assertAsmEqual(<<16#721c005f:32/little>>, "tst w2, #16", jit_aarch64_asm:tst_w(r2, 16)), + ?_assertAsmEqual(<<16#72001c7f:32/little>>, "tst w3, #255", jit_aarch64_asm:tst_w(r3, 255)), + ?_assertAsmEqual( + <<16#7200249f:32/little>>, "tst w4, #1023", jit_aarch64_asm:tst_w(r4, 1023) + ), + ?_assertAsmEqual(<<16#720014bf:32/little>>, "tst w5, #63", jit_aarch64_asm:tst_w(r5, 63)), + ?_assertAsmEqual(<<16#721b00df:32/little>>, "tst w6, #32", jit_aarch64_asm:tst_w(r6, 32)), + ?_assertAsmEqual(<<16#721a00ff:32/little>>, "tst w7, #64", jit_aarch64_asm:tst_w(r7, 64)), + ?_assertAsmEqual(<<16#721e051f:32/little>>, "tst w8, #0xc", jit_aarch64_asm:tst_w(r8, 16#c)) ]. bcc_test_() -> [ - ?_assertEqual(<<16#54000000:32/little>>, jit_aarch64_asm:bcc(eq, 0)), - ?_assertEqual(<<16#54000001:32/little>>, jit_aarch64_asm:bcc(ne, 0)), - ?_assertEqual(<<16#54fffe01:32/little>>, jit_aarch64_asm:bcc(ne, -64)), - ?_assertEqual( - asm(<<16#54000400:32/little>>, "b.eq 128"), - jit_aarch64_asm:bcc(eq, 128) - ), - ?_assertEqual( - asm(<<16#54000402:32/little>>, "b.cs 128"), - jit_aarch64_asm:bcc(cs, 128) - ), - ?_assertEqual( - asm(<<16#54000403:32/little>>, "b.cc 128"), - jit_aarch64_asm:bcc(cc, 128) - ), - ?_assertEqual( - asm(<<16#54000404:32/little>>, "b.mi 128"), - jit_aarch64_asm:bcc(mi, 128) - ), - ?_assertEqual( - asm(<<16#54000405:32/little>>, "b.pl 128"), - jit_aarch64_asm:bcc(pl, 128) - ), - ?_assertEqual( - asm(<<16#54000406:32/little>>, "b.vs 128"), - jit_aarch64_asm:bcc(vs, 128) - ), - ?_assertEqual( - asm(<<16#54000408:32/little>>, "b.hi 128"), - jit_aarch64_asm:bcc(hi, 128) - ), - ?_assertEqual( - asm(<<16#54000409:32/little>>, "b.ls 128"), - jit_aarch64_asm:bcc(ls, 128) - ), - ?_assertEqual( - asm(<<16#5400040a:32/little>>, "b.ge 128"), - jit_aarch64_asm:bcc(ge, 128) - ), - ?_assertEqual( - asm(<<16#5400040b:32/little>>, "b.lt 128"), - jit_aarch64_asm:bcc(lt, 128) - ), - ?_assertEqual( - asm(<<16#5400040c:32/little>>, "b.gt 128"), - jit_aarch64_asm:bcc(gt, 128) - ), - ?_assertEqual( - asm(<<16#5400040d:32/little>>, "b.le 128"), - jit_aarch64_asm:bcc(le, 128) - ), - ?_assertEqual( - asm(<<16#5400040e:32/little>>, "b.al 128"), - jit_aarch64_asm:bcc(al, 128) - ), - ?_assertEqual( - asm(<<16#5400040f:32/little>>, "b.nv 128"), - jit_aarch64_asm:bcc(nv, 128) - ) + ?_assertAsmEqual(<<16#54000000:32/little>>, "b.eq .+0", jit_aarch64_asm:bcc(eq, 0)), + ?_assertAsmEqual(<<16#54000001:32/little>>, "b.ne .+0", jit_aarch64_asm:bcc(ne, 0)), + ?_assertAsmEqual(<<16#54fffe01:32/little>>, "b.ne .-64", jit_aarch64_asm:bcc(ne, -64)), + ?_assertAsmEqual(<<16#54000400:32/little>>, "b.eq 128", jit_aarch64_asm:bcc(eq, 128)), + ?_assertAsmEqual(<<16#54000402:32/little>>, "b.cs 128", jit_aarch64_asm:bcc(cs, 128)), + ?_assertAsmEqual(<<16#54000403:32/little>>, "b.cc 128", jit_aarch64_asm:bcc(cc, 128)), + ?_assertAsmEqual(<<16#54000404:32/little>>, "b.mi 128", jit_aarch64_asm:bcc(mi, 128)), + ?_assertAsmEqual(<<16#54000405:32/little>>, "b.pl 128", jit_aarch64_asm:bcc(pl, 128)), + ?_assertAsmEqual(<<16#54000406:32/little>>, "b.vs 128", jit_aarch64_asm:bcc(vs, 128)), + ?_assertAsmEqual(<<16#54000408:32/little>>, "b.hi 128", jit_aarch64_asm:bcc(hi, 128)), + ?_assertAsmEqual(<<16#54000409:32/little>>, "b.ls 128", jit_aarch64_asm:bcc(ls, 128)), + ?_assertAsmEqual(<<16#5400040a:32/little>>, "b.ge 128", jit_aarch64_asm:bcc(ge, 128)), + ?_assertAsmEqual(<<16#5400040b:32/little>>, "b.lt 128", jit_aarch64_asm:bcc(lt, 128)), + ?_assertAsmEqual(<<16#5400040c:32/little>>, "b.gt 128", jit_aarch64_asm:bcc(gt, 128)), + ?_assertAsmEqual(<<16#5400040d:32/little>>, "b.le 128", jit_aarch64_asm:bcc(le, 128)), + ?_assertAsmEqual(<<16#5400040e:32/little>>, "b.al 128", jit_aarch64_asm:bcc(al, 128)), + ?_assertAsmEqual(<<16#5400040f:32/little>>, "b.nv 128", jit_aarch64_asm:bcc(nv, 128)) ]. cbnz_test_() -> [ - ?_assertEqual( - asm(<<16#b5000401:32/little>>, "cbnz x1, 128"), - jit_aarch64_asm:cbnz(r1, 128) + ?_assertAsmEqual(<<16#b5000401:32/little>>, "cbnz x1, 128", jit_aarch64_asm:cbnz(r1, 128)), + ?_assertAsmEqual( + <<16#35000402:32/little>>, "cbnz w2, 128", jit_aarch64_asm:cbnz_w(r2, 128) ), - ?_assertEqual( - asm(<<16#35000402:32/little>>, "cbnz w2, 128"), - jit_aarch64_asm:cbnz_w(r2, 128) - ), - ?_assertEqual( - asm(<<16#b5fffc03:32/little>>, "cbnz x3, -128"), - jit_aarch64_asm:cbnz(r3, -128) - ) + ?_assertAsmEqual(<<16#b5fffc03:32/little>>, "cbnz x3, -128", jit_aarch64_asm:cbnz(r3, -128)) ]. tbz_test_() -> [ - ?_assertEqual( - asm(<<16#b6f80400:32/little>>, "tbz x0, #63, 128"), - jit_aarch64_asm:tbz(r0, 63, 128) + ?_assertAsmEqual( + <<16#b6f80400:32/little>>, "tbz x0, #63, 128", jit_aarch64_asm:tbz(r0, 63, 128) ), - ?_assertEqual( - asm(<<16#36180400:32/little>>, "tbz x0, #3, 128"), - jit_aarch64_asm:tbz(r0, 3, 128) + ?_assertAsmEqual( + <<16#36180400:32/little>>, "tbz x0, #3, 128", jit_aarch64_asm:tbz(r0, 3, 128) ), - ?_assertEqual( - asm(<<16#363ffc03:32/little>>, "tbz x3, #7, -128"), - jit_aarch64_asm:tbz(r3, 7, -128) + ?_assertAsmEqual( + <<16#363ffc03:32/little>>, "tbz x3, #7, -128", jit_aarch64_asm:tbz(r3, 7, -128) ) ]. tbnz_test_() -> [ - ?_assertEqual( - asm(<<16#37000400:32/little>>, "tbnz x0, #0, 128"), - jit_aarch64_asm:tbnz(r0, 0, 128) + ?_assertAsmEqual( + <<16#37000400:32/little>>, "tbnz x0, #0, 128", jit_aarch64_asm:tbnz(r0, 0, 128) ), - ?_assertEqual( - asm(<<16#37180400:32/little>>, "tbnz x0, #3, 128"), - jit_aarch64_asm:tbnz(r0, 3, 128) + ?_assertAsmEqual( + <<16#37180400:32/little>>, "tbnz x0, #3, 128", jit_aarch64_asm:tbnz(r0, 3, 128) ), - ?_assertEqual( - asm(<<16#373ffc03:32/little>>, "tbnz x3, #7, -128"), - jit_aarch64_asm:tbnz(r3, 7, -128) + ?_assertAsmEqual( + <<16#373ffc03:32/little>>, "tbnz x3, #7, -128", jit_aarch64_asm:tbnz(r3, 7, -128) ) ]. stp_test_() -> [ - ?_assertEqual( + ?_assertAsmEqual( <<16#a8815113:32/little>>, + "stp x19, x20, [x8], #16", jit_aarch64_asm:stp(r19, r20, {r8}, 16) ), - ?_assertEqual( + ?_assertAsmEqual( <<16#a88153f3:32/little>>, + "stp x19, x20, [sp], #16", jit_aarch64_asm:stp(r19, r20, {sp}, 16) ), % Store-update (writeback) variants - ?_assertEqual( + ?_assertAsmEqual( <<16#a9bf27e8:32/little>>, + "stp x8, x9, [sp, #-16]!", jit_aarch64_asm:stp(r8, r9, {sp, -16}, '!') ), - ?_assertEqual( + ?_assertAsmEqual( <<16#a98127e8:32/little>>, + "stp x8, x9, [sp, #16]!", jit_aarch64_asm:stp(r8, r9, {sp, 16}, '!') ), - ?_assertEqual( + ?_assertAsmEqual( <<16#a98027e8:32/little>>, + "stp x8, x9, [sp, #0]!", jit_aarch64_asm:stp(r8, r9, {sp, 0}, '!') ) ]. ldp_test_() -> [ - ?_assertEqual( + ?_assertAsmEqual( <<16#a8c15113:32/little>>, + "ldp x19, x20, [x8], #16", jit_aarch64_asm:ldp(r19, r20, {r8}, 16) ), - ?_assertEqual( + ?_assertAsmEqual( <<16#a8c153f3:32/little>>, + "ldp x19, x20, [sp], #16", jit_aarch64_asm:ldp(r19, r20, {sp}, 16) ) ]. @@ -465,76 +470,37 @@ ldp_test_() -> subs_test_() -> [ % SUBS with immediate - ?_assertEqual(<<16#F1000021:32/little>>, jit_aarch64_asm:subs(r1, r1, 0)), - ?_assertEqual(<<16#F1000421:32/little>>, jit_aarch64_asm:subs(r1, r1, 1)), + ?_assertAsmEqual( + <<16#F1000021:32/little>>, "subs x1, x1, #0", jit_aarch64_asm:subs(r1, r1, 0) + ), + ?_assertAsmEqual( + <<16#F1000421:32/little>>, "subs x1, x1, #1", jit_aarch64_asm:subs(r1, r1, 1) + ), % SUBS with register - ?_assertEqual(<<16#eb000021:32/little>>, jit_aarch64_asm:subs(r1, r1, r0)), - ?_assertEqual(<<16#eb0a0021:32/little>>, jit_aarch64_asm:subs(r1, r1, r10)) + ?_assertAsmEqual( + <<16#eb000021:32/little>>, "subs x1, x1, x0", jit_aarch64_asm:subs(r1, r1, r0) + ), + ?_assertAsmEqual( + <<16#eb0a0021:32/little>>, "subs x1, x1, x10", jit_aarch64_asm:subs(r1, r1, r10) + ) ]. adr_test_() -> [ %% ADR x0, #0 - ?_assertEqual(<<16#10000000:32/little>>, jit_aarch64_asm:adr(r0, 0)), + ?_assertAsmEqual(<<16#10000000:32/little>>, "adr x0, .+0", jit_aarch64_asm:adr(r0, 0)), %% ADR x1, #4 - ?_assertEqual(<<16#10000021:32/little>>, jit_aarch64_asm:adr(r1, 4)), + ?_assertAsmEqual(<<16#10000021:32/little>>, "adr x1, .+4", jit_aarch64_asm:adr(r1, 4)), %% ADR x2, #-4 - ?_assertEqual(<<16#10ffffe2:32/little>>, jit_aarch64_asm:adr(r2, -4)), + ?_assertAsmEqual(<<16#10ffffe2:32/little>>, "adr x2, .-4", jit_aarch64_asm:adr(r2, -4)), %% ADR x3, #1048572 (max positive) - ?_assertEqual(<<16#107fffe3:32/little>>, jit_aarch64_asm:adr(r3, 1048572)), + ?_assertAsmEqual( + <<16#107fffe3:32/little>>, "adr x3, .+1048572", jit_aarch64_asm:adr(r3, 1048572) + ), %% ADR x4, #-1048576 (max negative) - ?_assertEqual(<<16#10800004:32/little>>, jit_aarch64_asm:adr(r4, -1048576)), + ?_assertAsmEqual( + <<16#10800004:32/little>>, "adr x4, .-1048576", jit_aarch64_asm:adr(r4, -1048576) + ), %% ADR with offset not a multiple of 4 is valid - ?_assertEqual(<<16#70000000:32/little>>, jit_aarch64_asm:adr(r0, 3)) + ?_assertAsmEqual(<<16#70000000:32/little>>, "adr x0, .+3", jit_aarch64_asm:adr(r0, 3)) ]. - -asm(Bin, Str) -> - case erlang:system_info(machine) of - "ATOM" -> - Bin; - "BEAM" -> - case os:cmd("which aarch64-elf-as") of - [] -> - Bin; - _ -> - ok = file:write_file("test.S", Str ++ "\n"), - Dump = os:cmd( - "aarch64-elf-as -c test.S -o test.o && aarch64-elf-objdump -D test.o" - ), - DumpBin = list_to_binary(Dump), - DumpLines = binary:split(DumpBin, <<"\n">>, [global]), - AsmBin = asm_lines(DumpLines, <<>>), - if - AsmBin =:= Bin -> - ok; - true -> - io:format( - "-------------------------------------------\n" - "~s\n" - "-------------------------------------------\n", - [Dump] - ) - end, - ?assertEqual(AsmBin, Bin), - Bin - end - end. - -asm_lines([<<" ", Tail/binary>> | T], Acc) -> - [_Offset, HexStr0] = binary:split(Tail, <<":\t">>), - [HexStr, _] = binary:split(HexStr0, <<"\t">>), - AssembledBin = hex_to_bin(HexStr, <<>>), - asm_lines(T, <>); -asm_lines([_OtherLine | T], Acc) -> - asm_lines(T, Acc); -asm_lines([], Acc) -> - Acc. - -hex_to_bin(<<>>, Acc) -> - Acc; -hex_to_bin(HexStr, Acc) -> - [HexChunk, Rest] = binary:split(HexStr, <<" ">>), - NumBits = byte_size(HexChunk) * 4, - HexVal = binary_to_integer(HexChunk, 16), - NewAcc = <>, - hex_to_bin(Rest, NewAcc). From de5b4d0756a6929d20ade4b735e6466d17df2c6c Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 19 Sep 2025 22:21:26 +0200 Subject: [PATCH 45/46] AArch64: improvement and complete coverage of asm functions Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64_asm.erl | 16 +-- tests/libs/jit/jit_aarch64_asm_tests.erl | 155 ++++++++++++++++++++++- 2 files changed, 152 insertions(+), 19 deletions(-) diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index 57f56819f..c63fe3c93 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -291,22 +291,16 @@ build_negative_immediate(Dst, ImmB) -> -spec build_immediate_sequence(aarch64_gpr_register(), [integer()]) -> binary(). build_immediate_sequence(Dst, [C0, C1, C2, C3]) -> %% Find the first non-zero chunk to start with MOVZ - case find_first_nonzero_chunk([C0, C1, C2, C3]) of - {Index, Value} -> - First = movz(Dst, Value, Index * 16), - Rest = build_movk_sequence(Dst, [C0, C1, C2, C3], Index), - <>; - none -> - %% All chunks are zero - movz(Dst, 0, 0) - end. + {Index, Value} = find_first_nonzero_chunk([C0, C1, C2, C3]), + First = movz(Dst, Value, Index * 16), + Rest = build_movk_sequence(Dst, [C0, C1, C2, C3], Index), + <>. %% Find the first non-zero chunk -spec find_first_nonzero_chunk([integer()]) -> {integer(), integer()} | none. find_first_nonzero_chunk(Chunks) -> find_first_nonzero_chunk(Chunks, 0). -find_first_nonzero_chunk([], _) -> none; find_first_nonzero_chunk([0 | Rest], Index) -> find_first_nonzero_chunk(Rest, Index + 1); find_first_nonzero_chunk([Chunk | _], Index) -> {Index, Chunk}. @@ -469,8 +463,6 @@ find_single_run_of_ones(Pattern, Size) -> find_ones_run([], _, OnesCount, StartPos, in_ones) when OnesCount > 0 -> %% Reached end while in ones run {ok, OnesCount, StartPos}; -find_ones_run([], _, _, _, _) -> - error; find_ones_run([1 | Rest], Pos, 0, _, none) -> %% Start of ones run find_ones_run(Rest, Pos + 1, 1, Pos, in_ones); diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index 67e009e8a..32b23891a 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -46,6 +46,46 @@ add_test_() -> ), ?_assertAsmEqual( <<16#8b030041:32/little>>, "add x1, x2, x3", jit_aarch64_asm:add(r1, r2, r3) + ), + %% Test add with invalid immediate + ?_assertError({unencodable_immediate, 16#FFFF}, jit_aarch64_asm:add(r0, r0, 16#FFFF)), + + %% Test cases for additional registers (r11, r12, r14, r22-r30) + ?_assertAsmEqual( + <<16#8b0b000b:32/little>>, "add x11, x0, x11", jit_aarch64_asm:add(r11, r0, r11) + ), + ?_assertAsmEqual( + <<16#8b0c000c:32/little>>, "add x12, x0, x12", jit_aarch64_asm:add(r12, r0, r12) + ), + ?_assertAsmEqual( + <<16#8b0e000e:32/little>>, "add x14, x0, x14", jit_aarch64_asm:add(r14, r0, r14) + ), + ?_assertAsmEqual( + <<16#8b160016:32/little>>, "add x22, x0, x22", jit_aarch64_asm:add(r22, r0, r22) + ), + ?_assertAsmEqual( + <<16#8b170017:32/little>>, "add x23, x0, x23", jit_aarch64_asm:add(r23, r0, r23) + ), + ?_assertAsmEqual( + <<16#8b180018:32/little>>, "add x24, x0, x24", jit_aarch64_asm:add(r24, r0, r24) + ), + ?_assertAsmEqual( + <<16#8b190019:32/little>>, "add x25, x0, x25", jit_aarch64_asm:add(r25, r0, r25) + ), + ?_assertAsmEqual( + <<16#8b1a001a:32/little>>, "add x26, x0, x26", jit_aarch64_asm:add(r26, r0, r26) + ), + ?_assertAsmEqual( + <<16#8b1b001b:32/little>>, "add x27, x0, x27", jit_aarch64_asm:add(r27, r0, r27) + ), + ?_assertAsmEqual( + <<16#8b1c001c:32/little>>, "add x28, x0, x28", jit_aarch64_asm:add(r28, r0, r28) + ), + ?_assertAsmEqual( + <<16#8b1d001d:32/little>>, "add x29, x0, x29", jit_aarch64_asm:add(r29, r0, r29) + ), + ?_assertAsmEqual( + <<16#8b1e001e:32/little>>, "add x30, x0, x30", jit_aarch64_asm:add(r30, r0, r30) ) ]. @@ -208,7 +248,60 @@ mov_test_() -> % mov register ?_assertAsmEqual(<<16#AA0103E0:32/little>>, "mov x0, x1", jit_aarch64_asm:mov(r0, r1)), - ?_assertAsmEqual(<<16#AA0703E1:32/little>>, "mov x1, x7", jit_aarch64_asm:mov(r1, r7)) + ?_assertAsmEqual(<<16#AA0703E1:32/little>>, "mov x1, x7", jit_aarch64_asm:mov(r1, r7)), + + %% Test mov with zero immediate (should use movz with 0) + ?_assertAsmEqual( + <<16#d2800000:32/little>>, "movz x0, #0", jit_aarch64_asm:mov(r0, 0) + ), + + %% Test 4-bit pattern encoding + ?_test(begin + Result = jit_aarch64_asm:mov(r0, 16#FFFFFFFFFFFF0000), + ?assert(is_binary(Result)) + end), + %% Test complex immediate that will use fallback sequence + ?_test(begin + % This should be a complex immediate that can't be encoded as bitmask + % and needs fallback to build_immediate_sequence + Result = jit_aarch64_asm:mov(r0, 16#123456789ABCDEF0), + ?assert(is_binary(Result)) + end), + + %% Test negative immediate that uses build_negative_immediate fallback + ?_assertAsmEqual( + << + 16#d2842200:32/little, + 16#f2aca860:32/little, + 16#f2d530e0:32/little, + 16#f2fdb960:32/little + >>, + "mov x0, #0x2110\n" + "movk x0, #0x6543, lsl #16\n" + "movk x0, #0xa987, lsl #32\n" + "movk x0, #0xedcb, lsl #48", + jit_aarch64_asm:mov(r0, -16#123456789ABCDEF0) + ), + + %% Test bitmask patterns with different sizes + %% Size 16 pattern: repeats every 16 bits + ?_assertAsmEqual( + <<16#b20083e0:32/little>>, + "mov x0, #0x0001000100010001", + jit_aarch64_asm:mov(r0, 16#0001000100010001) + ), + %% Size 4 pattern: repeats every 4 bits + ?_assertAsmEqual( + <<16#b200e7e0:32/little>>, + "mov x0, #0x3333333333333333", + jit_aarch64_asm:mov(r0, 16#3333333333333333) + ), + %% Size 2 pattern: repeats every 2 bits + ?_assertAsmEqual( + <<16#b200f3e0:32/little>>, + "mov x0, #0x5555555555555555", + jit_aarch64_asm:mov(r0, 16#5555555555555555) + ) ]. orr_test_() -> @@ -223,7 +316,19 @@ orr_test_() -> ), ?_assertAsmEqual( <<16#AA020041:32/little>>, "orr x1, x2, x2", jit_aarch64_asm:orr(r1, r2, r2) - ) + ), + + %% Test orr with valid bitmask immediate + ?_assertAsmEqual( + <<16#b24007e0:32/little>>, "orr x0, xzr, #0x3", jit_aarch64_asm:orr(r0, xzr, 16#3) + ), + %% Test orr with another bitmask pattern + ?_assertAsmEqual( + <<16#b27f1fe0:32/little>>, "orr x0, xzr, #0x1fe", jit_aarch64_asm:orr(r0, xzr, 16#1fe) + ), + + %% Test orr with unencodable immediate + ?_assertError({unencodable_immediate, 16#123456}, jit_aarch64_asm:orr(r0, r0, 16#123456)) ]. str_test_() -> @@ -272,14 +377,29 @@ cmp_test_() -> ?_assertAsmEqual(<<16#EB01001F:32/little>>, "cmp x0, x1", jit_aarch64_asm:cmp(r0, r1)), % cmp reg, imm ?_assertAsmEqual(<<16#F100001F:32/little>>, "cmp x0, #0", jit_aarch64_asm:cmp(r0, 0)), - ?_assertAsmEqual(<<16#F103001F:32/little>>, "cmp x0, #192", jit_aarch64_asm:cmp(r0, 192)) + ?_assertAsmEqual(<<16#F103001F:32/little>>, "cmp x0, #192", jit_aarch64_asm:cmp(r0, 192)), + + %% Test large immediate compare (uses temporary register) + ?_test(begin + Result = jit_aarch64_asm:cmp(r0, 16#12345678), + ?assert(is_binary(Result)), + ?assert(byte_size(Result) > 4) + end) ]. cmp_w_test_() -> [ % cmp_w reg, imm ?_assertAsmEqual(<<16#7100001F:32/little>>, "cmp w0, #0", jit_aarch64_asm:cmp_w(r0, 0)), - ?_assertAsmEqual(<<16#7103001F:32/little>>, "cmp w0, #192", jit_aarch64_asm:cmp_w(r0, 192)) + ?_assertAsmEqual(<<16#7103001F:32/little>>, "cmp w0, #192", jit_aarch64_asm:cmp_w(r0, 192)), + + %% Test 32-bit compare with negative immediate + ?_assertAsmEqual( + <<16#3100041f:32/little>>, "adds wzr, w0, #1", jit_aarch64_asm:cmp_w(r0, -1) + ), + ?_assertAsmEqual( + <<16#31000c1f:32/little>>, "adds wzr, w0, #3", jit_aarch64_asm:cmp_w(r0, -3) + ) ]. and_test_() -> @@ -311,6 +431,10 @@ and_test_() -> <<16#92785c83:32/little>>, "and x3, x4, #0xffffff00", jit_aarch64_asm:and_(r3, r4, 16#ffffff00) + ), + %% Test and_ with unencodable immediate + ?_assertError( + {unencodable_immediate, 16#123456}, jit_aarch64_asm:and_(r0, r0, 16#123456) ) ]. @@ -349,7 +473,10 @@ tst_test_() -> ?_assertAsmEqual(<<16#f24014bf:32/little>>, "tst x5, #63", jit_aarch64_asm:tst(r5, 63)), ?_assertAsmEqual(<<16#f27b00df:32/little>>, "tst x6, #32", jit_aarch64_asm:tst(r6, 32)), ?_assertAsmEqual(<<16#f27a00ff:32/little>>, "tst x7, #64", jit_aarch64_asm:tst(r7, 64)), - ?_assertAsmEqual(<<16#f27e051f:32/little>>, "tst x8, #0xc", jit_aarch64_asm:tst(r8, 16#c)) + ?_assertAsmEqual(<<16#f27e051f:32/little>>, "tst x8, #0xc", jit_aarch64_asm:tst(r8, 16#c)), + + %% Test tst with unencodable immediate + ?_assertError({unencodable_immediate, 16#123456}, jit_aarch64_asm:tst(r0, 16#123456)) ]. tst_w_test_() -> @@ -364,7 +491,12 @@ tst_w_test_() -> ?_assertAsmEqual(<<16#720014bf:32/little>>, "tst w5, #63", jit_aarch64_asm:tst_w(r5, 63)), ?_assertAsmEqual(<<16#721b00df:32/little>>, "tst w6, #32", jit_aarch64_asm:tst_w(r6, 32)), ?_assertAsmEqual(<<16#721a00ff:32/little>>, "tst w7, #64", jit_aarch64_asm:tst_w(r7, 64)), - ?_assertAsmEqual(<<16#721e051f:32/little>>, "tst w8, #0xc", jit_aarch64_asm:tst_w(r8, 16#c)) + ?_assertAsmEqual( + <<16#721e051f:32/little>>, "tst w8, #0xc", jit_aarch64_asm:tst_w(r8, 16#c) + ), + + %% Test tst_w with unencodable immediate + ?_assertError({unencodable_immediate, 16#123456}, jit_aarch64_asm:tst_w(r0, 16#123456)) ]. bcc_test_() -> @@ -385,7 +517,8 @@ bcc_test_() -> ?_assertAsmEqual(<<16#5400040c:32/little>>, "b.gt 128", jit_aarch64_asm:bcc(gt, 128)), ?_assertAsmEqual(<<16#5400040d:32/little>>, "b.le 128", jit_aarch64_asm:bcc(le, 128)), ?_assertAsmEqual(<<16#5400040e:32/little>>, "b.al 128", jit_aarch64_asm:bcc(al, 128)), - ?_assertAsmEqual(<<16#5400040f:32/little>>, "b.nv 128", jit_aarch64_asm:bcc(nv, 128)) + ?_assertAsmEqual(<<16#5400040f:32/little>>, "b.nv 128", jit_aarch64_asm:bcc(nv, 128)), + ?_assertAsmEqual(<<16#54000007:32/little>>, "b.vc .+0", jit_aarch64_asm:bcc(vc, 0)) ]. cbnz_test_() -> @@ -504,3 +637,11 @@ adr_test_() -> %% ADR with offset not a multiple of 4 is valid ?_assertAsmEqual(<<16#70000000:32/little>>, "adr x0, .+3", jit_aarch64_asm:adr(r0, 3)) ]. + +%% Test nop instruction +nop_test_() -> + [ + ?_assertAsmEqual( + <<16#d503201f:32/little>>, "nop", jit_aarch64_asm:nop() + ) + ]. From feac56c2b7d2c7f81b1dd839507ad53aedad2b50 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 21 Sep 2025 23:52:02 +0200 Subject: [PATCH 46/46] AArch64: fix assembler for some instructions and tests Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64_asm.erl | 7 +- tests/libs/jit/jit_aarch64_asm_tests.erl | 95 +++++++++++++++++------- 2 files changed, 74 insertions(+), 28 deletions(-) diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index c63fe3c93..166221247 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -235,8 +235,9 @@ mov(Rd, Rm) when is_atom(Rd), is_atom(Rm) -> mov_immediate(Dst, Imm) when Imm >= 0, Imm =< 16#FFFF -> %% Simple 16-bit positive immediate movz(Dst, Imm, 0); -mov_immediate(Dst, Imm) when Imm < 0, Imm >= -16#FFFF -> +mov_immediate(Dst, Imm) when Imm < 0, (-Imm - 1) =< 16#FFFF -> %% Simple 16-bit negative immediate using MOVN + %% MOVN encodes ~immediate, so we can use it when ~Imm fits in 16 bits DstNum = reg_to_num(Dst), <<(16#92800000 bor (((-Imm - 1) band 16#FFFF) bsl 5) bor DstNum):32/little>>; mov_immediate(Dst, Imm) when Imm >= 0 -> @@ -372,7 +373,7 @@ orr_immediate(Dst, N, Immr, Imms) when % 64-bit operation Sf = 1, << - ((Sf bsl 31) bor (16#32000000) bor (N bsl 22) bor (Immr bsl 16) bor (Imms bsl 10) bor + ((Sf bsl 31) bor (16#B2000000) bor (N bsl 22) bor (Immr bsl 16) bor (Imms bsl 10) bor (31 bsl 5) bor DstNum):32/little >>. @@ -796,7 +797,7 @@ cmp(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> %% For large immediates, load into a temporary register and compare %% Use r16 as temporary register (caller-saved) TempReg = r16, - LoadInstr = build_positive_immediate(TempReg, <>), + LoadInstr = mov_immediate(TempReg, Imm), CmpInstr = cmp(Rn, TempReg), <>. diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index 32b23891a..7e43ddb65 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -24,9 +24,29 @@ -include_lib("eunit/include/eunit.hrl"). -endif. +-export([ + list_to_integer/1, + list_to_integer/2 +]). + +list_to_integer(X) -> erlang:list_to_integer(X). +list_to_integer(X, B) -> erlang:list_to_integer(X, B). + -define(_assertAsmEqual(Bin, Str, Value), ?_assertEqual(jit_tests_common:asm(aarch64, Bin, Str), Value) ). +-define(_assertAsmEqualLargeInt(Bin, Str, Value), + ?_test(begin + case erlang:system_info(machine) of + "BEAM" -> + ?assertEqual(jit_tests_common:asm(aarch64, Bin, Str), Value); + "ATOM" -> + % AtomVM doesn't handle large integers yet. + % Skip the test + ok + end + end) +). add_test_() -> [ @@ -236,14 +256,14 @@ mov_test_() -> ), % mov immediate - very large value requiring multiple instructions - ?_assertAsmEqual( + ?_assertAsmEqualLargeInt( <<16#D29579A1:32/little, 16#F2B7C041:32/little, 16#F2DFD741:32/little, 16#F2EFF941:32/little>>, "mov x1, #0xabcd\n" "movk x1, #0xbe02, lsl #16\n" "movk x1, #0xfeba, lsl #32\n" "movk x1, #0x7fca, lsl #48", - jit_aarch64_asm:mov(r1, 9208452466117618637) + jit_aarch64_asm:mov(r1, ?MODULE:list_to_integer("9208452466117618637")) ), % mov register @@ -256,20 +276,28 @@ mov_test_() -> ), %% Test 4-bit pattern encoding - ?_test(begin - Result = jit_aarch64_asm:mov(r0, 16#FFFFFFFFFFFF0000), - ?assert(is_binary(Result)) - end), + ?_assertAsmEqual( + <<16#929fffe0:32/little>>, + "mov x0, #-65536", + jit_aarch64_asm:mov(r0, -65536) + ), %% Test complex immediate that will use fallback sequence - ?_test(begin - % This should be a complex immediate that can't be encoded as bitmask - % and needs fallback to build_immediate_sequence - Result = jit_aarch64_asm:mov(r0, 16#123456789ABCDEF0), - ?assert(is_binary(Result)) - end), + ?_assertAsmEqualLargeInt( + << + 16#d29bde00:32/little, + 16#f2b35780:32/little, + 16#f2cacf00:32/little, + 16#f2e24680:32/little + >>, + "mov x0, #0xdef0\n" + "movk x0, #0x9abc, lsl #16\n" + "movk x0, #0x5678, lsl #32\n" + "movk x0, #0x1234, lsl #48", + jit_aarch64_asm:mov(r0, ?MODULE:list_to_integer("123456789ABCDEF0", 16)) + ), %% Test negative immediate that uses build_negative_immediate fallback - ?_assertAsmEqual( + ?_assertAsmEqualLargeInt( << 16#d2842200:32/little, 16#f2aca860:32/little, @@ -280,27 +308,27 @@ mov_test_() -> "movk x0, #0x6543, lsl #16\n" "movk x0, #0xa987, lsl #32\n" "movk x0, #0xedcb, lsl #48", - jit_aarch64_asm:mov(r0, -16#123456789ABCDEF0) + jit_aarch64_asm:mov(r0, ?MODULE:list_to_integer("-123456789ABCDEF0", 16)) ), %% Test bitmask patterns with different sizes %% Size 16 pattern: repeats every 16 bits - ?_assertAsmEqual( + ?_assertAsmEqualLargeInt( <<16#b20083e0:32/little>>, "mov x0, #0x0001000100010001", - jit_aarch64_asm:mov(r0, 16#0001000100010001) + jit_aarch64_asm:mov(r0, ?MODULE:list_to_integer("0001000100010001", 16)) ), %% Size 4 pattern: repeats every 4 bits - ?_assertAsmEqual( + ?_assertAsmEqualLargeInt( <<16#b200e7e0:32/little>>, "mov x0, #0x3333333333333333", - jit_aarch64_asm:mov(r0, 16#3333333333333333) + jit_aarch64_asm:mov(r0, ?MODULE:list_to_integer("3333333333333333", 16)) ), %% Size 2 pattern: repeats every 2 bits - ?_assertAsmEqual( + ?_assertAsmEqualLargeInt( <<16#b200f3e0:32/little>>, "mov x0, #0x5555555555555555", - jit_aarch64_asm:mov(r0, 16#5555555555555555) + jit_aarch64_asm:mov(r0, ?MODULE:list_to_integer("5555555555555555", 16)) ) ]. @@ -380,11 +408,28 @@ cmp_test_() -> ?_assertAsmEqual(<<16#F103001F:32/little>>, "cmp x0, #192", jit_aarch64_asm:cmp(r0, 192)), %% Test large immediate compare (uses temporary register) - ?_test(begin - Result = jit_aarch64_asm:cmp(r0, 16#12345678), - ?assert(is_binary(Result)), - ?assert(byte_size(Result) > 4) - end) + ?_assertAsmEqual( + << + 16#d28acf10:32/little, + 16#f2a24690:32/little, + 16#eb10001f:32/little + >>, + "mov x16, #0x5678\n" + "movk x16, #0x1234, lsl #16\n" + "cmp x0, x16", + jit_aarch64_asm:cmp(r0, 16#12345678) + ), + + %% Test negative immediate compare (uses MOVN) + ?_assertAsmEqual( + << + 16#92800010:32/little, + 16#eb1000ff:32/little + >>, + "movn x16, #0\n" + "cmp x7, x16", + jit_aarch64_asm:cmp(r7, -1) + ) ]. cmp_w_test_() ->