Skip to content

Commit

Permalink
AMDGPU: Add baseline tests for cmpxchg custom expansion
Browse files Browse the repository at this point in the history
We need a non-atomic path if flat may access private.
  • Loading branch information
arsenm committed Oct 7, 2024
1 parent 3310689 commit 4934c7d
Show file tree
Hide file tree
Showing 5 changed files with 382 additions and 25 deletions.
34 changes: 17 additions & 17 deletions llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5005,7 +5005,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr %out, i64 %in, i64 %old
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5061,7 +5061,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr %out, i64 %in, i64 %ol
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 9000
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5121,7 +5121,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(ptr %out, ptr %out2, i6
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr %out2
ret void
Expand Down Expand Up @@ -5184,7 +5184,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5257,7 +5257,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(ptr %out, ptr %o
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr %out2
ret void
Expand Down Expand Up @@ -5310,7 +5310,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64(ptr %out, i64 %in, i64 %old) {
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5365,7 +5365,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret(ptr %out, ptr %out2, i64 %in,
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
entry:
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr %out2
ret void
Expand Down Expand Up @@ -5423,7 +5423,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr %out, i64 %in, i64 %ind
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5491,7 +5491,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(ptr %out, ptr %out2, i6
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr %out2
ret void
Expand Down Expand Up @@ -5543,7 +5543,7 @@ define amdgpu_kernel void @atomic_load_f64_offset(ptr %in, ptr %out) {
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr double, ptr %in, i64 4
%val = load atomic double, ptr %gep seq_cst, align 8
%val = load atomic double, ptr %gep seq_cst, align 8, !noalias.addrspace !0
store double %val, ptr %out
ret void
}
Expand Down Expand Up @@ -5589,7 +5589,7 @@ define amdgpu_kernel void @atomic_load_f64(ptr %in, ptr %out) {
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
entry:
%val = load atomic double, ptr %in syncscope("agent") seq_cst, align 8
%val = load atomic double, ptr %in syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
store double %val, ptr %out
ret void
}
Expand Down Expand Up @@ -5654,7 +5654,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64_offset(ptr %in, ptr %out, i64
entry:
%ptr = getelementptr double, ptr %in, i64 %index
%gep = getelementptr double, ptr %ptr, i64 4
%val = load atomic double, ptr %gep seq_cst, align 8
%val = load atomic double, ptr %gep seq_cst, align 8, !noalias.addrspace !0
store double %val, ptr %out
ret void
}
Expand Down Expand Up @@ -5714,7 +5714,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64(ptr %in, ptr %out, i64 %index)
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr double, ptr %in, i64 %index
%val = load atomic double, ptr %ptr seq_cst, align 8
%val = load atomic double, ptr %ptr seq_cst, align 8, !noalias.addrspace !0
store double %val, ptr %out
ret void
}
Expand Down Expand Up @@ -5757,7 +5757,7 @@ define amdgpu_kernel void @atomic_store_f64_offset(double %in, ptr %out) {
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr double, ptr %out, i64 4
store atomic double %in, ptr %gep seq_cst, align 8
store atomic double %in, ptr %gep seq_cst, align 8, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5794,7 +5794,7 @@ define amdgpu_kernel void @atomic_store_f64(double %in, ptr %out) {
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] scope:SCOPE_SYS
; GFX12-NEXT: s_endpgm
entry:
store atomic double %in, ptr %out seq_cst, align 8
store atomic double %in, ptr %out seq_cst, align 8, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5850,7 +5850,7 @@ define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, ptr %out,
entry:
%ptr = getelementptr double, ptr %out, i64 %index
%gep = getelementptr double, ptr %ptr, i64 4
store atomic double %in, ptr %gep seq_cst, align 8
store atomic double %in, ptr %gep seq_cst, align 8, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5901,7 +5901,7 @@ define amdgpu_kernel void @atomic_store_f64_addr64(double %in, ptr %out, i64 %in
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr double, ptr %out, i64 %index
store atomic double %in, ptr %ptr seq_cst, align 8
store atomic double %in, ptr %ptr seq_cst, align 8, !noalias.addrspace !0
ret void
}

Expand Down
12 changes: 7 additions & 5 deletions llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,12 @@ define i16 @test_cmpxchg_i16_global_agent_align4(ptr addrspace(1) %out, i16 %in,

define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX90A-LABEL: define void @syncscope_workgroup_nortn(
; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0]] {
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR]])
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX90A: atomicrmw.shared:
; GFX90A-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]]
; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]]
; GFX90A: atomicrmw.check.private:
; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[ADDR]])
Expand All @@ -144,16 +144,16 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.global:
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.phi:
; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
; GFX90A: atomicrmw.end:
; GFX90A-NEXT: ret void
;
; GFX1100-LABEL: define void @syncscope_workgroup_nortn(
; GFX1100-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
; GFX1100-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0]] {
; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]]
; GFX1100-NEXT: ret void
;
%res = atomicrmw fadd ptr %addr, float %val syncscope("workgroup") seq_cst, !mmra !2, !amdgpu.no.fine.grained.memory !3, !amdgpu.ignore.denormal.mode !3
Expand Down Expand Up @@ -193,8 +193,10 @@ define i32 @atomic_load_global_align1(ptr addrspace(1) %ptr) {
; GFX90A: [[META0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]]}
; GFX90A: [[META1]] = !{!"foo", !"bar"}
; GFX90A: [[META2]] = !{!"bux", !"baz"}
; GFX90A: [[META3]] = !{}
;.
; GFX1100: [[META0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]]}
; GFX1100: [[META1]] = !{!"foo", !"bar"}
; GFX1100: [[META2]] = !{!"bux", !"baz"}
; GFX1100: [[META3]] = !{}
;.
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX908-NEXT: br label [[ATOMICRMW_PHI]]
; GFX908: atomicrmw.global:
; GFX908-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
; GFX908-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX908-NEXT: br label [[ATOMICRMW_PHI]]
; GFX908: atomicrmw.phi:
; GFX908-NEXT: br label [[ATOMICRMW_END:%.*]]
Expand All @@ -188,7 +188,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.global:
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.phi:
; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
Expand Down
Loading

0 comments on commit 4934c7d

Please sign in to comment.