Skip to content

Commit

Permalink
AArch64: Optimize select_val
Browse files Browse the repository at this point in the history
  • Loading branch information
bjorng committed Jul 22, 2023
1 parent c89ae15 commit 9105de4
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 47 deletions.
7 changes: 4 additions & 3 deletions erts/emulator/beam/jit/arm/beam_asm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1232,9 +1232,10 @@ class BeamModuleAssembler : public BeamAssembler,
Label fail,
const Span<ArgVal> &args);

bool emit_optimized_three_way_select(arm::Gp reg,
Label fail,
const Span<ArgVal> &args);
void emit_optimized_two_way_select(arm::Gp reg,
const ArgVal &value1,
const ArgVal &value2,
const ArgVal &label);

#ifdef DEBUG
void emit_tuple_assertion(const ArgSource &Src, arm::Gp tuple_reg);
Expand Down
103 changes: 59 additions & 44 deletions erts/emulator/beam/jit/arm/instr_select.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,19 @@ void BeamModuleAssembler::emit_linear_search(arm::Gp comparand,
check_pending_stubs();
}

cmp_arg(comparand, value);
a.b_eq(resolve_beam_label(label, disp1MB));
if (i < count - 1 && label == args[i + count + 1]) {
emit_optimized_two_way_select(comparand,
value,
args[i + 1],
label);
i++;
if ((i % 128) == 0) {
check_pending_stubs();
}
} else {
cmp_arg(comparand, value);
a.b_eq(resolve_beam_label(label, disp1MB));
}
}

/* An invalid label means fallthrough to the next instruction. */
Expand Down Expand Up @@ -283,16 +294,11 @@ void BeamModuleAssembler::emit_i_select_val_lins(const ArgSource &Src,
auto shift = plan.second;

if (base == 0 && shift == 0) {
if (!emit_optimized_three_way_select(src.reg, fail, args)) {
emit_linear_search(src.reg, fail, args);
}
emit_linear_search(src.reg, fail, args);
} else {
auto untagged =
emit_select_untag(Src, args, src.reg, next, base, shift);

if (!emit_optimized_three_way_select(ARG1, fail, untagged)) {
emit_linear_search(ARG1, fail, untagged);
}
emit_linear_search(ARG1, fail, untagged);
}

if (!Fail.isLabel()) {
Expand Down Expand Up @@ -460,51 +466,60 @@ void BeamModuleAssembler::emit_i_jump_on_val(const ArgSource &Src,
}

/*
* Attempt to optimize the case when a select_val has exactly two
* values which only differ by one bit and they both branch to the
* same label.
* Optimize the case when a select_val has exactly two values that
* both branch to the same label.
*
* If the values only differ by one bit, the optimization makes use of
* the observation that (V == X || V == Y) is equivalent to (V | (X ^
* Y)) == (X | Y) when (X ^ Y) has only one bit set.
*
* The optimization makes use of the observation that (V == X || V ==
* Y) is equivalent to (V | (X ^ Y)) == (X | Y) when (X ^ Y) has only
* one bit set.
* If more than one bit differ, one conditional branch instruction can
* still be eliminated by using the CCMP instruction.
*
* Return true if the optimization was possible.
*/
bool BeamModuleAssembler::emit_optimized_three_way_select(
void BeamModuleAssembler::emit_optimized_two_way_select(
arm::Gp reg,
Label fail,
const Span<ArgVal> &args) {
if (args.size() != 4 || (args[2] != args[3])) {
return false;
}

uint64_t x = args[0].isImmed() ? args[0].as<ArgImmed>().get()
: args[0].as<ArgWord>().get();
uint64_t y = args[1].isImmed() ? args[1].as<ArgImmed>().get()
: args[1].as<ArgWord>().get();
uint64_t combined = x | y;
const ArgVal &value1,
const ArgVal &value2,
const ArgVal &label) {
uint64_t x = value1.isImmed() ? value1.as<ArgImmed>().get()
: value1.as<ArgWord>().get();
uint64_t y = value2.isImmed() ? value2.as<ArgImmed>().get()
: value2.as<ArgWord>().get();
uint64_t diff = x ^ y;

ArgWord val(combined);

if ((diff & (diff - 1)) != 0) {
return false;
}
/* Be sure to use a register not used by any caller. */
arm::Gp tmp = TMP6;

comment("(Src == 0x%x || Src == 0x%x) <=> (Src | 0x%x) == 0x%x",
x,
y,
diff,
combined);
if ((diff & (diff - 1)) == 0) {
uint64_t combined = x | y;
ArgWord val(combined);

a.orr(TMP1, reg, imm(diff));
cmp_arg(TMP1, val);
a.b_eq(resolve_beam_label(args[2], disp1MB));
comment("(Src == 0x%x || Src == 0x%x) <=> (Src | 0x%x) == 0x%x",
x,
y,
diff,
combined);

/* An invalid label means fallthrough to the next instruction. */
if (fail.isValid()) {
a.b(resolve_label(fail, disp128MB));
a.orr(tmp, reg, imm(diff));
cmp_arg(tmp, val);
} else {
if (x < 32) {
cmp(reg, y);
a.ccmp(reg,
imm(x),
imm(NZCV::kEqual),
imm(arm::CondCode::kNE));
} else {
cmp(reg, x);
a.mov(tmp, y);
a.ccmp(reg,
tmp,
imm(NZCV::kEqual),
imm(arm::CondCode::kNE));
}
}

return true;
a.b_eq(resolve_beam_label(label, disp1MB));
}

0 comments on commit 9105de4

Please sign in to comment.