-
Notifications
You must be signed in to change notification settings - Fork 11.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WebAssembly] Don't fold non-nuw add/sub in FastISel #111278
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,106 @@ | ||||||||||
; RUN: llc < %s -asm-verbose=false -fast-isel -verify-machineinstrs | FileCheck %s | ||||||||||
|
||||||||||
target triple = "wasm32-unknown-unknown" | ||||||||||
|
||||||||||
; FastISel should not fold one of the add/sub operands into a load/store's | ||||||||||
; offset when 'nuw' (no unsigned wrap) is not present, because the address | ||||||||||
; calculation does not wrap. When there is an add/sub and nuw is not present, we | ||||||||||
; bail out of FastISel. | ||||||||||
|
||||||||||
@mylabel = external global ptr | ||||||||||
|
||||||||||
; CHECK-LABEL: dont_fold_non_nuw_add_load: | ||||||||||
; CHECK: local.get 0 | ||||||||||
; CHECK-NEXT: i32.const 2147483644 | ||||||||||
; CHECK-NEXT: i32.add | ||||||||||
; CHECK-NEXT: i32.load 0 | ||||||||||
define i32 @dont_fold_non_nuw_add_load(ptr %p) { | ||||||||||
%q = ptrtoint ptr %p to i32 | ||||||||||
%r = add i32 %q, 2147483644 | ||||||||||
%s = inttoptr i32 %r to ptr | ||||||||||
%t = load i32, ptr %s | ||||||||||
ret i32 %t | ||||||||||
} | ||||||||||
|
||||||||||
; CHECK-LABEL: dont_fold_non_nuw_add_store: | ||||||||||
; CHECK: local.get 0 | ||||||||||
; CHECK-NEXT: i32.const 2147483644 | ||||||||||
; CHECK-NEXT: i32.add | ||||||||||
; CHECK-NEXT: i32.const 5 | ||||||||||
; CHECK-NEXT: i32.store 0 | ||||||||||
define void @dont_fold_non_nuw_add_store(ptr %p) { | ||||||||||
%q = ptrtoint ptr %p to i32 | ||||||||||
%r = add i32 %q, 2147483644 | ||||||||||
%s = inttoptr i32 %r to ptr | ||||||||||
store i32 5, ptr %s | ||||||||||
ret void | ||||||||||
} | ||||||||||
|
||||||||||
; CHECK-LABEL: dont_fold_non_nuw_add_load_2: | ||||||||||
; CHECK: i32.const mylabel | ||||||||||
; CHECK-NEXT: i32.const -4 | ||||||||||
; CHECK-NEXT: i32.add | ||||||||||
; CHECK-NEXT: i32.load 0 | ||||||||||
define i32 @dont_fold_non_nuw_add_load_2() { | ||||||||||
%t = load i32, ptr inttoptr (i32 add (i32 ptrtoint (ptr @mylabel to i32), i32 -4) to ptr), align 4 | ||||||||||
ret i32 %t | ||||||||||
} | ||||||||||
|
||||||||||
; CHECK-LABEL: dont_fold_non_nuw_add_store_2: | ||||||||||
; CHECK: i32.const mylabel | ||||||||||
; CHECK-NEXT: i32.const -4 | ||||||||||
; CHECK-NEXT: i32.add | ||||||||||
; CHECK-NEXT: i32.const 5 | ||||||||||
; CHECK-NEXT: i32.store 0 | ||||||||||
define void @dont_fold_non_nuw_add_store_2() { | ||||||||||
store i32 5, ptr inttoptr (i32 add (i32 ptrtoint (ptr @mylabel to i32), i32 -4) to ptr), align 4 | ||||||||||
ret void | ||||||||||
} | ||||||||||
|
||||||||||
; CHECK-LABEL: dont_fold_non_nuw_sub_load: | ||||||||||
; CHECK: local.get 0 | ||||||||||
; CHECK-NEXT: i32.const 2147483644 | ||||||||||
; CHECK-NEXT: i32.add | ||||||||||
; CHECK-NEXT: i32.load 0 | ||||||||||
define i32 @dont_fold_non_nuw_sub_load(ptr %p) { | ||||||||||
%q = ptrtoint ptr %p to i32 | ||||||||||
%r = sub i32 %q, -2147483644 | ||||||||||
%s = inttoptr i32 %r to ptr | ||||||||||
%t = load i32, ptr %s | ||||||||||
ret i32 %t | ||||||||||
} | ||||||||||
|
||||||||||
; CHECK-LABEL: dont_fold_non_nuw_sub_store: | ||||||||||
; CHECK: local.get 0 | ||||||||||
; CHECK-NEXT: i32.const 2147483644 | ||||||||||
; CHECK-NEXT: i32.add | ||||||||||
; CHECK-NEXT: i32.const 5 | ||||||||||
; CHECK-NEXT: i32.store 0 | ||||||||||
define void @dont_fold_non_nuw_sub_store(ptr %p) { | ||||||||||
%q = ptrtoint ptr %p to i32 | ||||||||||
%r = sub i32 %q, -2147483644 | ||||||||||
%s = inttoptr i32 %r to ptr | ||||||||||
store i32 5, ptr %s | ||||||||||
ret void | ||||||||||
} | ||||||||||
|
||||||||||
; CHECK-LABEL: dont_fold_non_nuw_sub_load_2: | ||||||||||
; CHECK: i32.const mylabel | ||||||||||
; CHECK-NEXT: i32.const -4 | ||||||||||
; CHECK-NEXT: i32.add | ||||||||||
; CHECK-NEXT: i32.load 0 | ||||||||||
define i32 @dont_fold_non_nuw_sub_load_2() { | ||||||||||
%t = load i32, ptr inttoptr (i32 sub (i32 ptrtoint (ptr @mylabel to i32), i32 4) to ptr), align 4 | ||||||||||
ret i32 %t | ||||||||||
} | ||||||||||
|
||||||||||
; CHECK-LABEL: dont_fold_non_nuw_sub_store_2: | ||||||||||
; CHECK: i32.const mylabel | ||||||||||
; CHECK-NEXT: i32.const -4 | ||||||||||
; CHECK-NEXT: i32.add | ||||||||||
; CHECK-NEXT: i32.const 5 | ||||||||||
; CHECK-NEXT: i32.store 0 | ||||||||||
define void @dont_fold_non_nuw_sub_store_2() { | ||||||||||
store i32 5, ptr inttoptr (i32 sub (i32 ptrtoint (ptr @mylabel to i32), i32 4) to ptr), align 4 | ||||||||||
ret void | ||||||||||
} | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually these two are correctly compiled even in the current compiler, because in llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp Lines 355 to 358 in 6de5305
But But I added these tests for symmetry and to ensure if these invariants are well maintained in future. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,7 @@ target triple = "wasm32-unknown-unknown" | |
define i32 @foo() { | ||
%stack_addr = alloca i32 | ||
%stack_i = ptrtoint ptr %stack_addr to i32 | ||
%added = add i32 %stack_i, undef | ||
%added = add nuw i32 %stack_i, undef | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test is supposed to be folded. So I added |
||
%added_addr = inttoptr i32 %added to ptr | ||
%ret = load i32, ptr %added_addr | ||
ret i32 %ret | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you have any sense of how often this happens, i.e. how often these address-calculation adds lack
nuw
? It would be a bummer if we bailed out of fast-isel for the majority of address-calculations (since bailing out forces the whole block to be re-selected, it could potentially regress compile time noticeably).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would assume this will be frontend-dependent. For C/C++ input, it should be rare because address arithmetic is all
inbounds
GEPs. For the input we have, it will be very common.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I made this pattern of code
assert(false)
in FastISel and ran emscriptencore0
tests with it, and the only case this pattern was generated is in asan tests.This part in the normal ISel is hit a lot more because many other instructions, including
getelementptr
s, are turned intoISD:ADD
in ISel. But this is in the normal ISel and there's no "bailout". (What I told you offline was actually hitting this cases in the normal ISel. I addedassert(false)
in both here and the newly added FastISel checks. So this one is not very relevant anyway.)llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
Lines 328 to 332 in 6568827
So I guess, as @SingleAccretion pointed out, we don't need to worry too much about -O0 compilation time at least in C/C++. But this may regress -O0 compilation time in C# (or other languages).
Currently our FastISel's
selectLoad
andselectStore
only handles the cases where its address can be represented within a singleAddress
class:llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
Lines 53 to 110 in db4874c
And the addressing mode is assumed to be either reg-based or frameindex-based. So we can handle cases like reg + label but not two different regs, which we need to handle to fix this bug within FastISel.
While it is not impossible to extend FastISel to handle the two-register cases, I guess that means we can handle a lot more cases that we are currently rejecting, which means even bigger overhaul. It's possible that we only additionally handle the cases in this bug, but we don't really know how frequent these cases are compared to other cases we are already rejecting. Also given that this is a "Fast" ISel after all, I'm not sure whether a bigger extension of the pass is worthwhile.
How about landing this for now, because this is a correctness issue, and see if this is really regressing -O0 compilation times a lot? For that we'd appreciate feedback from C# devs (@SingleAccretion and @yowl).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can get some PIN (binary instrumentation) data on this change with our input - it should not take a lot of time (mainly building clang itself).
However, before I do that, I have a more basic question: why
return false
and notbreak
? I don't think there is a need to support complex addressing modes with two registers, we can presumably just use the non-nuw
add
as the one and only base register (with a zero offset), which, if I am reading the code correctly, is whatbreak
ing will do.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are right! I was misunderstanding the "one reg" rule in FastISel. If FastISel cannot figure out how to make use of the offset, it assigns the whole expression to a single register, which then generates more instructions to make it happen (by adding more registers for its child expressions):
llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
Line 380 in c563fe5
Anyway, changed them to
break
s, and renamed the test file and added-fast-isel-abort=1
because we don't bail out of FastISel anymore. Thanks!