diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll index c0b3adce81342d..f4fe003a34d3fb 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll @@ -5088,7 +5088,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr %out, i64 %in, i64 %old ; GFX12-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr %out, i64 4 - %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst + %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 ret void } @@ -5145,7 +5145,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr %out, i64 %in, i64 %ol ; GFX12-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr %out, i64 9000 - %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst + %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 ret void } @@ -5206,7 +5206,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(ptr %out, ptr %out2, i6 ; GFX12-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr %out, i64 4 - %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst + %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 %extract0 = extractvalue { i64, i1 } %val, 0 store i64 %extract0, ptr %out2 ret void @@ -5270,7 +5270,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i entry: %ptr = getelementptr i64, ptr %out, i64 %index %gep = getelementptr i64, ptr %ptr, i64 4 - %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst + %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 ret void } @@ -5344,7 +5344,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(ptr %out, ptr %o entry: %ptr = getelementptr i64, ptr %out, i64 %index %gep = getelementptr i64, ptr %ptr, i64 4 - %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst + %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 %extract0 = extractvalue { i64, i1 } %val, 0 store i64 %extract0, ptr %out2 ret void @@ -5398,7 +5398,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64(ptr %out, i64 %in, i64 %old) { ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst + %val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 ret void } @@ -5454,7 +5454,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret(ptr %out, ptr %out2, i64 %in, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst + %val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 %extract0 = extractvalue { i64, i1 } %val, 0 store i64 %extract0, ptr %out2 ret void @@ -5513,7 +5513,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr %out, i64 %in, i64 %ind ; GFX12-NEXT: s_endpgm entry: %ptr = getelementptr i64, ptr %out, i64 %index - %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst + %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 ret void } @@ -5582,7 +5582,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(ptr %out, ptr %out2, i6 ; GFX12-NEXT: s_endpgm entry: %ptr = getelementptr i64, ptr %out, i64 %index - %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst + %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 %extract0 = extractvalue { i64, i1 } %val, 0 store i64 %extract0, ptr %out2 ret void @@ -5634,7 +5634,7 @@ define amdgpu_kernel void @atomic_load_f64_offset(ptr %in, ptr %out) { ; GFX12-NEXT: s_endpgm entry: %gep = getelementptr double, ptr %in, i64 4 - %val = load atomic double, ptr %gep seq_cst, align 8 + %val = load atomic double, ptr %gep seq_cst, align 8, !noalias.addrspace !0 store double %val, ptr %out ret void } @@ -5680,7 +5680,7 @@ define amdgpu_kernel void @atomic_load_f64(ptr %in, ptr %out) { ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %val = load atomic double, ptr %in syncscope("agent") seq_cst, align 8 + %val = load atomic double, ptr %in syncscope("agent") seq_cst, align 8, !noalias.addrspace !0 store double %val, ptr %out ret void } @@ -5745,7 +5745,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64_offset(ptr %in, ptr %out, i64 entry: %ptr = getelementptr double, ptr %in, i64 %index %gep = getelementptr double, ptr %ptr, i64 4 - %val = load atomic double, ptr %gep seq_cst, align 8 + %val = load atomic double, ptr %gep seq_cst, align 8, !noalias.addrspace !0 store double %val, ptr %out ret void } @@ -5805,7 +5805,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64(ptr %in, ptr %out, i64 %index) ; GFX12-NEXT: s_endpgm entry: %ptr = getelementptr double, ptr %in, i64 %index - %val = load atomic double, ptr %ptr seq_cst, align 8 + %val = load atomic double, ptr %ptr seq_cst, align 8, !noalias.addrspace !0 store double %val, ptr %out ret void } @@ -5848,7 +5848,7 @@ define amdgpu_kernel void @atomic_store_f64_offset(double %in, ptr %out) { ; GFX12-NEXT: s_endpgm entry: %gep = getelementptr double, ptr %out, i64 4 - store atomic double %in, ptr %gep seq_cst, align 8 + store atomic double %in, ptr %gep seq_cst, align 8, !noalias.addrspace !0 ret void } @@ -5885,7 +5885,7 @@ define amdgpu_kernel void @atomic_store_f64(double %in, ptr %out) { ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] scope:SCOPE_SYS ; GFX12-NEXT: s_endpgm entry: - store atomic double %in, ptr %out seq_cst, align 8 + store atomic double %in, ptr %out seq_cst, align 8, !noalias.addrspace !0 ret void } @@ -5941,7 +5941,7 @@ define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, ptr %out, entry: %ptr = getelementptr double, ptr %out, i64 %index %gep = getelementptr double, ptr %ptr, i64 4 - store atomic double %in, ptr %gep seq_cst, align 8 + store atomic double %in, ptr %gep seq_cst, align 8, !noalias.addrspace !0 ret void } @@ -5992,7 +5992,7 @@ define amdgpu_kernel void @atomic_store_f64_addr64(double %in, ptr %out, i64 %in ; GFX12-NEXT: s_endpgm entry: %ptr = getelementptr double, ptr %out, i64 %index - store atomic double %in, ptr %ptr seq_cst, align 8 + store atomic double %in, ptr %ptr seq_cst, align 8, !noalias.addrspace !0 ret void } diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll index 3c5f3a09082a72..e79bb465563e84 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll @@ -126,12 +126,12 @@ define i16 @test_cmpxchg_i16_global_agent_align4(ptr addrspace(1) %out, i16 %in, define void @syncscope_workgroup_nortn(ptr %addr, float %val) { ; GFX90A-LABEL: define void @syncscope_workgroup_nortn( -; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] { +; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0]] { ; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR]]) ; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] ; GFX90A: atomicrmw.shared: ; GFX90A-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3) -; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]] +; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]] ; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]] ; GFX90A: atomicrmw.check.private: ; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[ADDR]]) @@ -144,7 +144,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) { ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] ; GFX90A: atomicrmw.global: ; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1) -; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]] +; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]] ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] ; GFX90A: atomicrmw.phi: ; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]] @@ -152,8 +152,8 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) { ; GFX90A-NEXT: ret void ; ; GFX1100-LABEL: define void @syncscope_workgroup_nortn( -; GFX1100-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] { -; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]] +; GFX1100-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0]] { +; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]] ; GFX1100-NEXT: ret void ; %res = atomicrmw fadd ptr %addr, float %val syncscope("workgroup") seq_cst, !mmra !2, !amdgpu.no.fine.grained.memory !3, !amdgpu.ignore.denormal.mode !3 @@ -193,8 +193,10 @@ define i32 @atomic_load_global_align1(ptr addrspace(1) %ptr) { ; GFX90A: [[META0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]]} ; GFX90A: [[META1]] = !{!"foo", !"bar"} ; GFX90A: [[META2]] = !{!"bux", !"baz"} +; GFX90A: [[META3]] = !{} ;. ; GFX1100: [[META0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]]} ; GFX1100: [[META1]] = !{!"foo", !"bar"} ; GFX1100: [[META2]] = !{!"bux", !"baz"} +; GFX1100: [[META3]] = !{} ;. diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll index e8b4e752d3a28c..056eee5b987d65 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll @@ -163,7 +163,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) { ; GFX908-NEXT: br label [[ATOMICRMW_PHI]] ; GFX908: atomicrmw.global: ; GFX908-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1) -; GFX908-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX908-NEXT: br label [[ATOMICRMW_PHI]] ; GFX908: atomicrmw.phi: ; GFX908-NEXT: br label [[ATOMICRMW_END:%.*]] @@ -188,7 +188,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) { ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] ; GFX90A: atomicrmw.global: ; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1) -; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] ; GFX90A: atomicrmw.phi: ; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]] diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-flat-noalias-addrspace.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-flat-noalias-addrspace.ll index cb51bcf9356141..cb2ba0f7eb0b5d 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-flat-noalias-addrspace.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-flat-noalias-addrspace.ll @@ -341,7 +341,6 @@ define i64 @test_flat_atomicrmw_and_i64_agent__noalias_addrspace_5__maybe_fine_g ret i64 %res } - define i32 @test_flat_atomicrmw_and_i32_agent__noalias_addrspace_5(ptr %ptr, i32 %value) { ; ALL-LABEL: define i32 @test_flat_atomicrmw_and_i32_agent__noalias_addrspace_5( ; ALL-SAME: ptr [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] { @@ -352,6 +351,132 @@ define i32 @test_flat_atomicrmw_and_i32_agent__noalias_addrspace_5(ptr %ptr, i32 ret i32 %res } +define i64 @test_flat_atomicrmw_and_i64_agent__mmra(ptr %ptr, i64 %value) { +; ALL-LABEL: define i64 @test_flat_atomicrmw_and_i64_agent__mmra( +; ALL-SAME: ptr [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] { +; ALL-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]]) +; ALL-NEXT: br i1 [[IS_PRIVATE]], label %[[ATOMICRMW_PRIVATE:.*]], label %[[ATOMICRMW_GLOBAL:.*]] +; ALL: [[ATOMICRMW_PRIVATE]]: +; ALL-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5) +; ALL-NEXT: [[LOADED_PRIVATE:%.*]] = load i64, ptr addrspace(5) [[TMP1]], align 8 +; ALL-NEXT: [[NEW:%.*]] = and i64 [[LOADED_PRIVATE]], [[VALUE]] +; ALL-NEXT: store i64 [[NEW]], ptr addrspace(5) [[TMP1]], align 8 +; ALL-NEXT: br label %[[ATOMICRMW_PHI:.*]] +; ALL: [[ATOMICRMW_GLOBAL]]: +; ALL-NEXT: [[TMP2:%.*]] = atomicrmw and ptr [[PTR]], i64 [[VALUE]] syncscope("agent") seq_cst, align 8, !mmra [[META2:![0-9]+]], !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; ALL-NEXT: br label %[[ATOMICRMW_PHI]] +; ALL: [[ATOMICRMW_PHI]]: +; ALL-NEXT: [[RES:%.*]] = phi i64 [ [[LOADED_PRIVATE]], %[[ATOMICRMW_PRIVATE]] ], [ [[TMP2]], %[[ATOMICRMW_GLOBAL]] ] +; ALL-NEXT: br label %[[ATOMICRMW_END:.*]] +; ALL: [[ATOMICRMW_END]]: +; ALL-NEXT: ret i64 [[RES]] +; + %res = atomicrmw and ptr %ptr, i64 %value syncscope("agent") seq_cst, !mmra !4, !amdgpu.no.fine.grained.memory !0 + ret i64 %res +} + +define i64 @test_flat_atomicrmw_and_i64_agent__noalias_addrspace_5__mmra(ptr %ptr, i64 %value) { +; ALL-LABEL: define i64 @test_flat_atomicrmw_and_i64_agent__noalias_addrspace_5__mmra( +; ALL-SAME: ptr [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] { +; ALL-NEXT: [[RES:%.*]] = atomicrmw and ptr [[PTR]], i64 [[VALUE]] syncscope("agent") seq_cst, align 8, !mmra [[META2]], !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; ALL-NEXT: ret i64 [[RES]] +; + %res = atomicrmw and ptr %ptr, i64 %value syncscope("agent") seq_cst, !noalias.addrspace !1, !mmra !4, !amdgpu.no.fine.grained.memory !0 + ret i64 %res +} + +; -------------------------------------------------------------------- +; General expansion for subb +; -------------------------------------------------------------------- + +define i64 @test_flat_atomicrmw_sub_i64_agent(ptr %ptr, i64 %value) { +; ALL-LABEL: define i64 @test_flat_atomicrmw_sub_i64_agent( +; ALL-SAME: ptr [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] { +; ALL-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]]) +; ALL-NEXT: br i1 [[IS_PRIVATE]], label %[[ATOMICRMW_PRIVATE:.*]], label %[[ATOMICRMW_GLOBAL:.*]] +; ALL: [[ATOMICRMW_PRIVATE]]: +; ALL-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5) +; ALL-NEXT: [[LOADED_PRIVATE:%.*]] = load i64, ptr addrspace(5) [[TMP1]], align 8 +; ALL-NEXT: [[NEW:%.*]] = sub i64 [[LOADED_PRIVATE]], [[VALUE]] +; ALL-NEXT: store i64 [[NEW]], ptr addrspace(5) [[TMP1]], align 8 +; ALL-NEXT: br label %[[ATOMICRMW_PHI:.*]] +; ALL: [[ATOMICRMW_GLOBAL]]: +; ALL-NEXT: [[TMP2:%.*]] = atomicrmw sub ptr [[PTR]], i64 [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; ALL-NEXT: br label %[[ATOMICRMW_PHI]] +; ALL: [[ATOMICRMW_PHI]]: +; ALL-NEXT: [[RES:%.*]] = phi i64 [ [[LOADED_PRIVATE]], %[[ATOMICRMW_PRIVATE]] ], [ [[TMP2]], %[[ATOMICRMW_GLOBAL]] ] +; ALL-NEXT: br label %[[ATOMICRMW_END:.*]] +; ALL: [[ATOMICRMW_END]]: +; ALL-NEXT: ret i64 [[RES]] +; + %res = atomicrmw sub ptr %ptr, i64 %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 + ret i64 %res +} + +define i64 @test_flat_atomicrmw_sub_i64_agent__noalias_addrspace_5(ptr %ptr, i64 %value) { +; ALL-LABEL: define i64 @test_flat_atomicrmw_sub_i64_agent__noalias_addrspace_5( +; ALL-SAME: ptr [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] { +; ALL-NEXT: [[RES:%.*]] = atomicrmw sub ptr [[PTR]], i64 [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; ALL-NEXT: ret i64 [[RES]] +; + %res = atomicrmw sub ptr %ptr, i64 %value syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 + ret i64 %res +} + +define i64 @test_flat_atomicrmw_sub_i64_agent__noalias_addrspace_5__maybe_fine_grained(ptr %ptr, i64 %value) { +; ALL-LABEL: define i64 @test_flat_atomicrmw_sub_i64_agent__noalias_addrspace_5__maybe_fine_grained( +; ALL-SAME: ptr [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] { +; ALL-NEXT: [[RES:%.*]] = atomicrmw sub ptr [[PTR]], i64 [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]] +; ALL-NEXT: ret i64 [[RES]] +; + %res = atomicrmw sub ptr %ptr, i64 %value syncscope("agent") seq_cst, !noalias.addrspace !1 + ret i64 %res +} + +define i32 @test_flat_atomicrmw_sub_i32_agent__noalias_addrspace_5(ptr %ptr, i32 %value) { +; ALL-LABEL: define i32 @test_flat_atomicrmw_sub_i32_agent__noalias_addrspace_5( +; ALL-SAME: ptr [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] { +; ALL-NEXT: [[RES:%.*]] = atomicrmw sub ptr [[PTR]], i32 [[VALUE]] syncscope("agent") seq_cst, align 4, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; ALL-NEXT: ret i32 [[RES]] +; + %res = atomicrmw sub ptr %ptr, i32 %value syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 + ret i32 %res +} + +define i64 @test_flat_atomicrmw_sub_i64_agent__mmra(ptr %ptr, i64 %value) { +; ALL-LABEL: define i64 @test_flat_atomicrmw_sub_i64_agent__mmra( +; ALL-SAME: ptr [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] { +; ALL-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]]) +; ALL-NEXT: br i1 [[IS_PRIVATE]], label %[[ATOMICRMW_PRIVATE:.*]], label %[[ATOMICRMW_GLOBAL:.*]] +; ALL: [[ATOMICRMW_PRIVATE]]: +; ALL-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5) +; ALL-NEXT: [[LOADED_PRIVATE:%.*]] = load i64, ptr addrspace(5) [[TMP1]], align 8 +; ALL-NEXT: [[NEW:%.*]] = sub i64 [[LOADED_PRIVATE]], [[VALUE]] +; ALL-NEXT: store i64 [[NEW]], ptr addrspace(5) [[TMP1]], align 8 +; ALL-NEXT: br label %[[ATOMICRMW_PHI:.*]] +; ALL: [[ATOMICRMW_GLOBAL]]: +; ALL-NEXT: [[TMP2:%.*]] = atomicrmw sub ptr [[PTR]], i64 [[VALUE]] syncscope("agent") seq_cst, align 8, !mmra [[META2]], !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; ALL-NEXT: br label %[[ATOMICRMW_PHI]] +; ALL: [[ATOMICRMW_PHI]]: +; ALL-NEXT: [[RES:%.*]] = phi i64 [ [[LOADED_PRIVATE]], %[[ATOMICRMW_PRIVATE]] ], [ [[TMP2]], %[[ATOMICRMW_GLOBAL]] ] +; ALL-NEXT: br label %[[ATOMICRMW_END:.*]] +; ALL: [[ATOMICRMW_END]]: +; ALL-NEXT: ret i64 [[RES]] +; + %res = atomicrmw sub ptr %ptr, i64 %value syncscope("agent") seq_cst, !mmra !4, !amdgpu.no.fine.grained.memory !0 + ret i64 %res +} + +define i64 @test_flat_atomicrmw_sub_i64_agent__noalias_addrspace_5__mmra(ptr %ptr, i64 %value) { +; ALL-LABEL: define i64 @test_flat_atomicrmw_sub_i64_agent__noalias_addrspace_5__mmra( +; ALL-SAME: ptr [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] { +; ALL-NEXT: [[RES:%.*]] = atomicrmw sub ptr [[PTR]], i64 [[VALUE]] syncscope("agent") seq_cst, align 8, !mmra [[META2]], !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; ALL-NEXT: ret i64 [[RES]] +; + %res = atomicrmw sub ptr %ptr, i64 %value syncscope("agent") seq_cst, !noalias.addrspace !1, !mmra !4, !amdgpu.no.fine.grained.memory !0 + ret i64 %res +} + ; -------------------------------------------------------------------- ; General expansion for fadd ; -------------------------------------------------------------------- @@ -1878,23 +2003,45 @@ define i32 @test_flat_atomicrmw_nand_i32_agent__noalias_addrspace_5(ptr %ptr, i3 !0 = !{} !1 = !{i32 5, i32 6} +!2 = !{!"foo", !"bar"} +!3 = !{!"bux", !"baz"} +!4 = !{!2, !3} +!5 = !{} ;. ; GFX7: [[META0]] = !{i32 5, i32 6} ; GFX7: [[META1]] = !{} +; GFX7: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]} +; GFX7: [[META3]] = !{!"foo", !"bar"} +; GFX7: [[META4]] = !{!"bux", !"baz"} ;. ; GFX900: [[META0]] = !{i32 5, i32 6} ; GFX900: [[META1]] = !{} +; GFX900: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]} +; GFX900: [[META3]] = !{!"foo", !"bar"} +; GFX900: [[META4]] = !{!"bux", !"baz"} ;. ; GFX908: [[META0]] = !{i32 5, i32 6} ; GFX908: [[META1]] = !{} +; GFX908: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]} +; GFX908: [[META3]] = !{!"foo", !"bar"} +; GFX908: [[META4]] = !{!"bux", !"baz"} ;. ; GFX90A: [[META0]] = !{i32 5, i32 6} ; GFX90A: [[META1]] = !{} +; GFX90A: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]} +; GFX90A: [[META3]] = !{!"foo", !"bar"} +; GFX90A: [[META4]] = !{!"bux", !"baz"} ;. ; GFX940: [[META0]] = !{i32 5, i32 6} ; GFX940: [[META1]] = !{} +; GFX940: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]} +; GFX940: [[META3]] = !{!"foo", !"bar"} +; GFX940: [[META4]] = !{!"bux", !"baz"} ;. ; GFX12: [[META0]] = !{i32 5, i32 6} ; GFX12: [[META1]] = !{} +; GFX12: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]} +; GFX12: [[META3]] = !{!"foo", !"bar"} +; GFX12: [[META4]] = !{!"bux", !"baz"} ;. diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-cmpxchg-flat-maybe-private.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-cmpxchg-flat-maybe-private.ll new file mode 100644 index 00000000000000..6b3c27be8688c2 --- /dev/null +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-cmpxchg-flat-maybe-private.ll @@ -0,0 +1,208 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand %s | FileCheck %s + +define { i16, i1 } @cmpxchg_flat_agent_i16(ptr %ptr, i16 %val, i16 %swap) { +; CHECK-LABEL: define { i16, i1 } @cmpxchg_flat_agent_i16( +; CHECK-SAME: ptr [[PTR:%.*]], i16 [[VAL:%.*]], i16 [[SWAP:%.*]]) { +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[SWAP]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[VAL]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]] +; CHECK-NEXT: br label %[[PARTWORD_CMPXCHG_LOOP:.*]] +; CHECK: [[PARTWORD_CMPXCHG_LOOP]]: +; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ [[TMP15:%.*]], %[[PARTWORD_CMPXCHG_FAILURE:.*]] ] +; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[TMP11]], i32 [[TMP10]] syncscope("agent") monotonic seq_cst, align 4 +; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1 +; CHECK-NEXT: br i1 [[TMP14]], label %[[PARTWORD_CMPXCHG_END:.*]], label %[[PARTWORD_CMPXCHG_FAILURE]] +; CHECK: [[PARTWORD_CMPXCHG_FAILURE]]: +; CHECK-NEXT: [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label %[[PARTWORD_CMPXCHG_LOOP]], label %[[PARTWORD_CMPXCHG_END]] +; CHECK: [[PARTWORD_CMPXCHG_END]]: +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1 +; CHECK-NEXT: ret { i16, i1 } [[TMP18]] +; + %result = cmpxchg ptr %ptr, i16 %val, i16 %swap syncscope("agent") monotonic seq_cst + ret { i16, i1 } %result +} + +define { i16, i1 } @cmpxchg_flat_agent_i16_align4(ptr %ptr, i16 %val, i16 %swap) { +; CHECK-LABEL: define { i16, i1 } @cmpxchg_flat_agent_i16_align4( +; CHECK-SAME: ptr [[PTR:%.*]], i16 [[VAL:%.*]], i16 [[SWAP:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[SWAP]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[VAL]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[PTR]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], -65536 +; CHECK-NEXT: br label %[[PARTWORD_CMPXCHG_LOOP:.*]] +; CHECK: [[PARTWORD_CMPXCHG_LOOP]]: +; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[TMP11:%.*]], %[[PARTWORD_CMPXCHG_FAILURE:.*]] ] +; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP7]], i32 [[TMP6]] syncscope("agent") monotonic seq_cst, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: br i1 [[TMP10]], label %[[PARTWORD_CMPXCHG_END:.*]], label %[[PARTWORD_CMPXCHG_FAILURE]] +; CHECK: [[PARTWORD_CMPXCHG_FAILURE]]: +; CHECK-NEXT: [[TMP11]] = and i32 [[TMP9]], -65536 +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP5]], [[TMP11]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[PARTWORD_CMPXCHG_LOOP]], label %[[PARTWORD_CMPXCHG_END]] +; CHECK: [[PARTWORD_CMPXCHG_END]]: +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP9]] to i16 +; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { i16, i1 } [[TMP13]], i1 [[TMP10]], 1 +; CHECK-NEXT: ret { i16, i1 } [[TMP14]] +; + %result = cmpxchg ptr %ptr, i16 %val, i16 %swap syncscope("agent") monotonic seq_cst, align 4 + ret { i16, i1 } %result +} + +define { i32, i1 } @cmpxchg_flat_agent_i32(ptr %ptr, i32 %val, i32 %swap) { +; CHECK-LABEL: define { i32, i1 } @cmpxchg_flat_agent_i32( +; CHECK-SAME: ptr [[PTR:%.*]], i32 [[VAL:%.*]], i32 [[SWAP:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i32 [[VAL]], i32 [[SWAP]] syncscope("agent") monotonic seq_cst, align 4 +; CHECK-NEXT: ret { i32, i1 } [[RESULT]] +; + %result = cmpxchg ptr %ptr, i32 %val, i32 %swap syncscope("agent") monotonic seq_cst + ret { i32, i1 } %result +} + +define { i64, i1 } @cmpxchg_flat_agent_i64(ptr %ptr, i64 %val, i64 %swap) { +; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 [[SWAP]] syncscope("agent") monotonic seq_cst, align 8 +; CHECK-NEXT: ret { i64, i1 } [[RESULT]] +; + %result = cmpxchg ptr %ptr, i64 %val, i64 %swap syncscope("agent") monotonic seq_cst + ret { i64, i1 } %result +} + +define { i64, i1 } @cmpxchg_flat_agent_i64_volatile(ptr %ptr, i64 %val, i64 %swap) { +; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64_volatile( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = cmpxchg volatile ptr [[PTR]], i64 [[VAL]], i64 [[SWAP]] syncscope("agent") monotonic seq_cst, align 8 +; CHECK-NEXT: ret { i64, i1 } [[RESULT]] +; + %result = cmpxchg volatile ptr %ptr, i64 %val, i64 %swap syncscope("agent") monotonic seq_cst + ret { i64, i1 } %result +} + +define { i16, i1 } @cmpxchg_flat_agent_i16__noprivate(ptr %ptr, i16 %val, i16 %swap) { +; CHECK-LABEL: define { i16, i1 } @cmpxchg_flat_agent_i16__noprivate( +; CHECK-SAME: ptr [[PTR:%.*]], i16 [[VAL:%.*]], i16 [[SWAP:%.*]]) { +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[SWAP]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[VAL]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]] +; CHECK-NEXT: br label %[[PARTWORD_CMPXCHG_LOOP:.*]] +; CHECK: [[PARTWORD_CMPXCHG_LOOP]]: +; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ [[TMP15:%.*]], %[[PARTWORD_CMPXCHG_FAILURE:.*]] ] +; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[TMP11]], i32 [[TMP10]] syncscope("agent") monotonic seq_cst, align 4 +; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1 +; CHECK-NEXT: br i1 [[TMP14]], label %[[PARTWORD_CMPXCHG_END:.*]], label %[[PARTWORD_CMPXCHG_FAILURE]] +; CHECK: [[PARTWORD_CMPXCHG_FAILURE]]: +; CHECK-NEXT: [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label %[[PARTWORD_CMPXCHG_LOOP]], label %[[PARTWORD_CMPXCHG_END]] +; CHECK: [[PARTWORD_CMPXCHG_END]]: +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1 +; CHECK-NEXT: ret { i16, i1 } [[TMP18]] +; + %result = cmpxchg ptr %ptr, i16 %val, i16 %swap syncscope("agent") monotonic seq_cst, !noalias.addrspace !0 + ret { i16, i1 } %result +} + +define { i32, i1 } @cmpxchg_flat_agent_i32__noprivate(ptr %ptr, i32 %val, i32 %swap) { +; CHECK-LABEL: define { i32, i1 } @cmpxchg_flat_agent_i32__noprivate( +; CHECK-SAME: ptr [[PTR:%.*]], i32 [[VAL:%.*]], i32 [[SWAP:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i32 [[VAL]], i32 [[SWAP]] syncscope("agent") monotonic seq_cst, align 4, !noalias.addrspace [[META0:![0-9]+]] +; CHECK-NEXT: ret { i32, i1 } [[RESULT]] +; + %result = cmpxchg ptr %ptr, i32 %val, i32 %swap syncscope("agent") monotonic seq_cst, !noalias.addrspace !0 + ret { i32, i1 } %result +} + +define { i64, i1 } @cmpxchg_flat_agent_i64__noprivate(ptr %ptr, i64 %val, i64 %swap) { +; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64__noprivate( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 [[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !noalias.addrspace [[META0]] +; CHECK-NEXT: ret { i64, i1 } [[RESULT]] +; + %result = cmpxchg ptr %ptr, i64 %val, i64 %swap syncscope("agent") monotonic seq_cst, !noalias.addrspace !0 + ret { i64, i1 } %result +} + +define { i64, i1 } @cmpxchg_flat_agent_i64__nolocal(ptr %ptr, i64 %val, i64 %swap) { +; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64__nolocal( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 [[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !noalias.addrspace [[META1:![0-9]+]] +; CHECK-NEXT: ret { i64, i1 } [[RESULT]] +; + %result = cmpxchg ptr %ptr, i64 %val, i64 %swap syncscope("agent") monotonic seq_cst, !noalias.addrspace !1 + ret { i64, i1 } %result +} + +define { i64, i1 } @cmpxchg_flat_agent_i64_mmra(ptr %ptr, i64 %val, i64 %swap) { +; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64_mmra( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 [[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !mmra [[META2:![0-9]+]] +; CHECK-NEXT: ret { i64, i1 } [[RESULT]] +; + %result = cmpxchg ptr %ptr, i64 %val, i64 %swap syncscope("agent") monotonic seq_cst, !mmra !4 + ret { i64, i1 } %result +} + +define { i64, i1 } @cmpxchg_flat_agent_i64_mmra_noprivate(ptr %ptr, i64 %val, i64 %swap) { +; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64_mmra_noprivate( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 [[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !mmra [[META2]], !noalias.addrspace [[META1]] +; CHECK-NEXT: ret { i64, i1 } [[RESULT]] +; + %result = cmpxchg ptr %ptr, i64 %val, i64 %swap syncscope("agent") monotonic seq_cst, !noalias.addrspace !1, !mmra !4 + ret { i64, i1 } %result +} + +!0 = !{i32 5, i32 6} +!1 = !{i32 3, i32 4} +!2 = !{!"foo", !"bar"} +!3 = !{!"bux", !"baz"} +!4 = !{!2, !3} +!5 = !{} + + +;. +; CHECK: [[META0]] = !{i32 5, i32 6} +; CHECK: [[META1]] = !{i32 3, i32 4} +; CHECK: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]} +; CHECK: [[META3]] = !{!"foo", !"bar"} +; CHECK: [[META4]] = !{!"bux", !"baz"} +;.