Skip to content

Commit

Permalink
Optimize select_val with consecutive identical labels
Browse files Browse the repository at this point in the history
  • Loading branch information
bjorng committed Jul 24, 2023
1 parent c89ae15 commit 23089ff
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 83 deletions.
7 changes: 4 additions & 3 deletions erts/emulator/beam/jit/arm/beam_asm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1232,9 +1232,10 @@ class BeamModuleAssembler : public BeamAssembler,
Label fail,
const Span<ArgVal> &args);

bool emit_optimized_three_way_select(arm::Gp reg,
Label fail,
const Span<ArgVal> &args);
void emit_optimized_two_way_select(arm::Gp reg,
const ArgVal &value1,
const ArgVal &value2,
const ArgVal &label);

#ifdef DEBUG
void emit_tuple_assertion(const ArgSource &Src, arm::Gp tuple_reg);
Expand Down
127 changes: 80 additions & 47 deletions erts/emulator/beam/jit/arm/instr_select.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,19 @@ void BeamModuleAssembler::emit_linear_search(arm::Gp comparand,
check_pending_stubs();
}

cmp_arg(comparand, value);
a.b_eq(resolve_beam_label(label, disp1MB));
if (i < count - 1 && label == args[i + count + 1]) {
emit_optimized_two_way_select(comparand,
value,
args[i + 1],
label);
i++;
if ((i % 128) == 0) {
check_pending_stubs();
}
} else {
cmp_arg(comparand, value);
a.b_eq(resolve_beam_label(label, disp1MB));
}
}

/* An invalid label means fallthrough to the next instruction. */
Expand Down Expand Up @@ -283,16 +294,11 @@ void BeamModuleAssembler::emit_i_select_val_lins(const ArgSource &Src,
auto shift = plan.second;

if (base == 0 && shift == 0) {
if (!emit_optimized_three_way_select(src.reg, fail, args)) {
emit_linear_search(src.reg, fail, args);
}
emit_linear_search(src.reg, fail, args);
} else {
auto untagged =
emit_select_untag(Src, args, src.reg, next, base, shift);

if (!emit_optimized_three_way_select(ARG1, fail, untagged)) {
emit_linear_search(ARG1, fail, untagged);
}
emit_linear_search(ARG1, fail, untagged);
}

if (!Fail.isLabel()) {
Expand Down Expand Up @@ -460,51 +466,78 @@ void BeamModuleAssembler::emit_i_jump_on_val(const ArgSource &Src,
}

/*
* Attempt to optimize the case when a select_val has exactly two
* values which only differ by one bit and they both branch to the
* same label.
* Optimize the case when a select_val has exactly two values that
* both branch to the same label.
*
* The optimization makes use of the observation that (V == X || V ==
* Y) is equivalent to (V | (X ^ Y)) == (X | Y) when (X ^ Y) has only
* one bit set.
* If the values only differ by one bit, the optimization makes use of
* the observation that (V == X || V == Y) is equivalent to (V | (X ^
* Y)) == (X | Y) when (X ^ Y) has only one bit set.
*
* If more than one bit differ, one conditional branch instruction can
* still be eliminated by using the CCMP instruction.
*
* Return true if the optimization was possible.
*/
bool BeamModuleAssembler::emit_optimized_three_way_select(
void BeamModuleAssembler::emit_optimized_two_way_select(
arm::Gp reg,
Label fail,
const Span<ArgVal> &args) {
if (args.size() != 4 || (args[2] != args[3])) {
return false;
}

uint64_t x = args[0].isImmed() ? args[0].as<ArgImmed>().get()
: args[0].as<ArgWord>().get();
uint64_t y = args[1].isImmed() ? args[1].as<ArgImmed>().get()
: args[1].as<ArgWord>().get();
uint64_t combined = x | y;
const ArgVal &value1,
const ArgVal &value2,
const ArgVal &label) {
uint64_t x = value1.isImmed() ? value1.as<ArgImmed>().get()
: value1.as<ArgWord>().get();
uint64_t y = value2.isImmed() ? value2.as<ArgImmed>().get()
: value2.as<ArgWord>().get();
uint64_t diff = x ^ y;

ArgWord val(combined);

if ((diff & (diff - 1)) != 0) {
return false;
}

comment("(Src == 0x%x || Src == 0x%x) <=> (Src | 0x%x) == 0x%x",
x,
y,
diff,
combined);

a.orr(TMP1, reg, imm(diff));
cmp_arg(TMP1, val);
a.b_eq(resolve_beam_label(args[2], disp1MB));
/* Be sure to use a register not used by any caller. */
arm::Gp tmp = TMP6;

/* An invalid label means fallthrough to the next instruction. */
if (fail.isValid()) {
a.b(resolve_label(fail, disp128MB));
if (x + 1 == y) {
comment("(Src == %ld || Src == %ld) <=> (Src - %ld) < 2",
x,
y,
x);
if (x == 0) {
a.cmp(reg, imm(2));
} else {
sub(tmp, reg, x);
a.cmp(tmp, imm(2));
}
a.b_lo(resolve_beam_label(label, disp1MB));
} else if ((diff & (diff - 1)) == 0) {
uint64_t combined = x | y;
ArgWord val(combined);

comment("(Src == 0x%x || Src == 0x%x) <=> (Src | 0x%x) == 0x%x",
x,
y,
diff,
combined);

a.orr(tmp, reg, imm(diff));
cmp_arg(tmp, val);
a.b_eq(resolve_beam_label(label, disp1MB));
} else {
if (x < 32) {
cmp(reg, y);
a.ccmp(reg,
imm(x),
imm(NZCV::kEqual),
imm(arm::CondCode::kNE));
} else if (-y < 32) {
cmp(reg, x);
a.ccmn(reg,
imm(-y),
imm(NZCV::kEqual),
imm(arm::CondCode::kNE));
} else {
cmp(reg, x);
a.mov(tmp, y);
a.ccmp(reg,
tmp,
imm(NZCV::kEqual),
imm(arm::CondCode::kNE));
}
a.b_eq(resolve_beam_label(label, disp1MB));
}

return true;
}
6 changes: 4 additions & 2 deletions erts/emulator/beam/jit/x86/beam_asm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1336,8 +1336,10 @@ class BeamModuleAssembler : public BeamAssembler,
const ArgVal &Fail,
const Span<ArgVal> &args);

bool emit_optimized_three_way_select(const ArgVal &Fail,
const Span<ArgVal> &args);
bool emit_optimized_two_way_select(bool destructive,
const ArgVal &value1,
const ArgVal &value2,
const ArgVal &label);

#ifdef DEBUG
void emit_tuple_assertion(const ArgSource &Src, x86::Gp tuple_reg);
Expand Down
59 changes: 28 additions & 31 deletions erts/emulator/beam/jit/x86/instr_select.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@ void BeamModuleAssembler::emit_linear_search(x86::Gp comparand,
const ArgImmed &value = args[i];
const ArgLabel &label = args[i + count];

if (i < count - 1 && label == args[i + count + 1]) {
if (emit_optimized_two_way_select(i == count - 2,
value,
args[i + 1],
label)) {
i++;
continue;
}
}

cmp_arg(comparand, value, ARG1);
a.je(resolve_beam_label(label));
}
Expand Down Expand Up @@ -86,10 +96,6 @@ void BeamModuleAssembler::emit_i_select_val_lins(const ArgSource &Src,

mov_arg(ARG2, Src);

if (emit_optimized_three_way_select(Fail, args)) {
return;
}

emit_linear_search(ARG2, Fail, args);
}

Expand Down Expand Up @@ -157,9 +163,7 @@ void BeamModuleAssembler::emit_binsearch_nodes(size_t Left,
args.begin() + Left + count,
args.begin() + count + Left + remaining);

if (!emit_optimized_three_way_select(Fail, shrunk)) {
emit_linear_search(ARG2, Fail, shrunk);
}
emit_linear_search(ARG2, Fail, shrunk);

return;
}
Expand Down Expand Up @@ -249,18 +253,18 @@ void BeamModuleAssembler::emit_i_jump_on_val(const ArgSource &Src,
* one bit set.
*
* ARG2 contains the value.
* Return true if the optimization was possible, in
* which case ARG1 should be considered trashed.
*
* Return true if the optimization was possible, in which case ARG1
* and ARG3 should be considered trashed. If the destructive argument
* is true, ARG2 will also be trashed.
*/
bool BeamModuleAssembler::emit_optimized_three_way_select(
const ArgVal &Fail,
const Span<ArgVal> &args) {
if (args.size() != 4 || (args[2] != args[3])) {
return false;
}

uint64_t x = args[0].as<ArgImmed>().get();
uint64_t y = args[1].as<ArgImmed>().get();
bool BeamModuleAssembler::emit_optimized_two_way_select(
bool destructive,
const ArgVal &value1,
const ArgVal &value2,
const ArgVal &label) {
uint64_t x = value1.as<ArgImmed>().get();
uint64_t y = value2.as<ArgImmed>().get();
uint64_t combined = x | y;
uint64_t diff = x ^ y;
ArgVal val(ArgVal::Immediate, combined);
Expand All @@ -273,22 +277,15 @@ bool BeamModuleAssembler::emit_optimized_three_way_select(
diff,
combined);

if (Support::isInt32((Sint)diff)) {
if (destructive && Support::isInt32((Sint)diff)) {
a.or_(ARG2, imm(diff));
cmp_arg(ARG2, val, ARG3);
} else {
a.mov(ARG1, imm(diff));
a.or_(ARG2, ARG1);
}

cmp_arg(ARG2, val, ARG1);
a.je(resolve_beam_label(args[2]));

if (Fail.isLabel()) {
a.jmp(resolve_beam_label(Fail));
} else {
/* NIL means fallthrough to the next instruction. */
ASSERT(Fail.isNil());
mov_imm(ARG1, diff);
a.or_(ARG1, ARG2);
cmp_arg(ARG1, val, ARG3);
}
a.je(resolve_beam_label(label));

return true;
}

0 comments on commit 23089ff

Please sign in to comment.