-
Notifications
You must be signed in to change notification settings - Fork 11.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[RFC] IR: Define noalias.addrspace metadata
This is intended to solve a problem with lowering atomics in OpenMP and C++ common to AMDGPU and NVPTX. In OpenCL and CUDA, it is undefined behavior for an atomic instruction to modify an object in thread private memory. In OpenMP, it is defined. Correspondingly, the hardware does not handle this correctly. For AMDGPU, 32-bit atomics work and 64-bit atomics are silently dropped. We therefore need to codegen this by inserting a runtime address space check, performing the private case without atomics, and fallback to issuing the real atomic otherwise. This metadata allows us to avoid this extra check and branch. Handle this by introducing metadata intended to be applied to atomicrmw, indicating they cannot access the forbidden address space.
- Loading branch information
Showing
6 changed files
with
237 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
; RUN: llvm-as < %s | llvm-dis | FileCheck %s | ||
|
||
define i64 @atomicrmw_noalias_addrspace__0_1(ptr %ptr, i64 %val) { | ||
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_1( | ||
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) { | ||
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META0:![0-9]+]] | ||
; CHECK-NEXT: ret i64 [[RET]] | ||
; | ||
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !0 | ||
ret i64 %ret | ||
} | ||
|
||
define i64 @atomicrmw_noalias_addrspace__0_2(ptr %ptr, i64 %val) { | ||
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_2( | ||
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) { | ||
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META1:![0-9]+]] | ||
; CHECK-NEXT: ret i64 [[RET]] | ||
; | ||
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !1 | ||
ret i64 %ret | ||
} | ||
|
||
define i64 @atomicrmw_noalias_addrspace__1_3(ptr %ptr, i64 %val) { | ||
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__1_3( | ||
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) { | ||
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META2:![0-9]+]] | ||
; CHECK-NEXT: ret i64 [[RET]] | ||
; | ||
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !2 | ||
ret i64 %ret | ||
} | ||
|
||
define i64 @atomicrmw_noalias_addrspace__multiple_ranges(ptr %ptr, i64 %val) { | ||
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__multiple_ranges( | ||
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) { | ||
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META3:![0-9]+]] | ||
; CHECK-NEXT: ret i64 [[RET]] | ||
; | ||
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !3 | ||
ret i64 %ret | ||
} | ||
|
||
define i64 @load_noalias_addrspace__5_6(ptr %ptr) { | ||
; CHECK-LABEL: define i64 @load_noalias_addrspace__5_6( | ||
; CHECK-SAME: ptr [[PTR:%.*]]) { | ||
; CHECK-NEXT: [[RET:%.*]] = load i64, ptr [[PTR]], align 4, !noalias.addrspace [[META4:![0-9]+]] | ||
; CHECK-NEXT: ret i64 [[RET]] | ||
; | ||
%ret = load i64, ptr %ptr, align 4, !noalias.addrspace !4 | ||
ret i64 %ret | ||
} | ||
|
||
define void @store_noalias_addrspace__5_6(ptr %ptr, i64 %val) { | ||
; CHECK-LABEL: define void @store_noalias_addrspace__5_6( | ||
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) { | ||
; CHECK-NEXT: store i64 [[VAL]], ptr [[PTR]], align 4, !noalias.addrspace [[META4]] | ||
; CHECK-NEXT: ret void | ||
; | ||
store i64 %val, ptr %ptr, align 4, !noalias.addrspace !4 | ||
ret void | ||
} | ||
|
||
define { i64, i1 } @cmpxchg_noalias_addrspace__5_6(ptr %ptr, i64 %val0, i64 %val1) { | ||
; CHECK-LABEL: define { i64, i1 } @cmpxchg_noalias_addrspace__5_6( | ||
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL0:%.*]], i64 [[VAL1:%.*]]) { | ||
; CHECK-NEXT: [[RET:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL0]], i64 [[VAL1]] monotonic monotonic, align 8, !noalias.addrspace [[META4]] | ||
; CHECK-NEXT: ret { i64, i1 } [[RET]] | ||
; | ||
%ret = cmpxchg ptr %ptr, i64 %val0, i64 %val1 monotonic monotonic, align 8, !noalias.addrspace !4 | ||
ret { i64, i1 } %ret | ||
} | ||
|
||
declare void @foo() | ||
|
||
define void @call_noalias_addrspace__5_6(ptr %ptr) { | ||
; CHECK-LABEL: define void @call_noalias_addrspace__5_6( | ||
; CHECK-SAME: ptr [[PTR:%.*]]) { | ||
; CHECK-NEXT: call void @foo(), !noalias.addrspace [[META4]] | ||
; CHECK-NEXT: ret void | ||
; | ||
call void @foo(), !noalias.addrspace !4 | ||
ret void | ||
} | ||
|
||
define void @call_memcpy_intrinsic_addrspace__5_6(ptr %dst, ptr %src, i64 %size) { | ||
; CHECK-LABEL: define void @call_memcpy_intrinsic_addrspace__5_6( | ||
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) { | ||
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false), !noalias.addrspace [[META4]] | ||
; CHECK-NEXT: ret void | ||
; | ||
call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false), !noalias.addrspace !4 | ||
ret void | ||
} | ||
|
||
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0 | ||
|
||
attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } | ||
|
||
!0 = !{i32 0, i32 1} | ||
!1 = !{i32 0, i32 2} | ||
!2 = !{i32 1, i32 3} | ||
!3 = !{i32 4, i32 6, i32 10, i32 55} | ||
!4 = !{i32 5, i32 6} | ||
;. | ||
; CHECK: [[META0]] = !{i32 0, i32 1} | ||
; CHECK: [[META1]] = !{i32 0, i32 2} | ||
; CHECK: [[META2]] = !{i32 1, i32 3} | ||
; CHECK: [[META3]] = !{i32 4, i32 6, i32 10, i32 55} | ||
; CHECK: [[META4]] = !{i32 5, i32 6} | ||
;. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s | ||
|
||
; CHECK: It should have at least one range! | ||
; CHECK-NEXT: !0 = !{} | ||
define i64 @noalias_addrspace__empty(ptr %ptr, i64 %val) { | ||
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !0 | ||
ret i64 %ret | ||
} | ||
|
||
; CHECK: Unfinished range! | ||
; CHECK-NEXT: !1 = !{i32 0} | ||
define i64 @noalias_addrspace__single_field(ptr %ptr, i64 %val) { | ||
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !1 | ||
ret i64 %ret | ||
} | ||
|
||
; CHECK: Range must not be empty! | ||
; CHECK-NEXT: !2 = !{i32 0, i32 0} | ||
define i64 @noalias_addrspace__0_0(ptr %ptr, i64 %val) { | ||
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !2 | ||
ret i64 %ret | ||
} | ||
|
||
; CHECK: noalias.addrspace type must be i32! | ||
; CHECK-NEXT: %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !3 | ||
define i64 @noalias_addrspace__i64(ptr %ptr, i64 %val) { | ||
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !3 | ||
ret i64 %ret | ||
} | ||
|
||
; CHECK: The lower limit must be an integer! | ||
define i64 @noalias_addrspace__fp(ptr %ptr, i64 %val) { | ||
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !4 | ||
ret i64 %ret | ||
} | ||
|
||
; CHECK: The lower limit must be an integer! | ||
define i64 @noalias_addrspace__ptr(ptr %ptr, i64 %val) { | ||
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !5 | ||
ret i64 %ret | ||
} | ||
|
||
; CHECK: The lower limit must be an integer! | ||
define i64 @noalias_addrspace__nonconstant(ptr %ptr, i64 %val) { | ||
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !6 | ||
ret i64 %ret | ||
} | ||
|
||
@gv0 = global i32 0 | ||
@gv1 = global i32 1 | ||
|
||
!0 = !{} | ||
!1 = !{i32 0} | ||
!2 = !{i32 0, i32 0} | ||
!3 = !{i64 1, i64 5} | ||
!4 = !{float 0.0, float 2.0} | ||
!5 = !{ptr null, ptr addrspace(1) null} | ||
!6 = !{i32 ptrtoint (ptr @gv0 to i32), i32 ptrtoint (ptr @gv1 to i32) } | ||
|
||
|