-
Notifications
You must be signed in to change notification settings - Fork 11.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
clang/AMDGPU: Set noalias.addrspace metadata on atomicrmw #102462
base: main
Are you sure you want to change the base?
clang/AMDGPU: Set noalias.addrspace metadata on atomicrmw #102462
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-clang-codegen Author: Matt Arsenault (arsenm) ChangesFull diff: https://github.com/llvm/llvm-project/pull/102462.diff 4 Files Affected:
diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index 91f1c2f2e6239e..9b7f1bddc0d1b3 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -647,6 +647,14 @@ class LangOptions : public LangOptionsBase {
return ConvergentFunctions;
}
+ /// Return true if atomicrmw operations targeting allocations in private
+ /// memory are undefined.
+ bool threadPrivateMemoryAtomicsAreUndefined() const {
+ // Should be false for OpenMP.
+ // TODO: Should this be true for SYCL?
+ return OpenCL || CUDA;
+ }
+
/// Return the OpenCL C or C++ version as a VersionTuple.
VersionTuple getOpenCLVersionTuple() const;
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 37e6af3d4196a8..c5797a405f7b30 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -9,6 +9,7 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
+#include "llvm/Support/AMDGPUAddrSpace.h"
using namespace clang;
using namespace clang::CodeGen;
@@ -550,6 +551,16 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata(
CodeGenFunction &CGF, llvm::AtomicRMWInst &RMW) const {
+
+ if (RMW.getPointerAddressSpace() == llvm::AMDGPUAS::FLAT_ADDRESS &&
+ CGF.CGM.getLangOpts().threadPrivateMemoryAtomicsAreUndefined()) {
+ llvm::MDBuilder MDHelper(CGF.getLLVMContext());
+ llvm::MDNode *ASRange = MDHelper.createRange(
+ llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS),
+ llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS + 1));
+ RMW.setMetadata(llvm::LLVMContext::MD_noalias_addrspace, ASRange);
+ }
+
if (!CGF.getTarget().allowAMDGPUUnsafeFPAtomics())
return;
diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
index 8bf8241e343e70..a5e9bd6df07143 100644
--- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
@@ -22,19 +22,19 @@
__global__ void ffp1(float *p) {
// CHECK-LABEL: @_Z4ffp1Pf
- // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4{{$}}
- // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4{{$}}
-
- // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE:[0-9]+]]{{$}}
+ // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+
+ // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE:[0-9]+]], !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// SAFE: _Z4ffp1Pf
// SAFE: global_atomic_cmpswap
@@ -62,19 +62,19 @@ __global__ void ffp1(float *p) {
__global__ void ffp2(double *p) {
// CHECK-LABEL: @_Z4ffp2Pd
- // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}}
-
- // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+
+ // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// SAFE-LABEL: @_Z4ffp2Pd
// SAFE: global_atomic_cmpswap_b64
@@ -102,19 +102,19 @@ __global__ void ffp2(double *p) {
// long double is the same as double for amdgcn.
__global__ void ffp3(long double *p) {
// CHECK-LABEL: @_Z4ffp3Pe
- // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}}
-
- // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+
+ // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// SAFE-LABEL: @_Z4ffp3Pe
// SAFE: global_atomic_cmpswap_b64
@@ -139,34 +139,34 @@ __global__ void ffp3(long double *p) {
__device__ double ffp4(double *p, float f) {
// CHECK-LABEL: @_Z4ffp4Pdf
// CHECK: fpext float {{.*}} to double
- // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
return __atomic_fetch_sub(p, f, memory_order_relaxed);
}
__device__ double ffp5(double *p, int i) {
// CHECK-LABEL: @_Z4ffp5Pdi
// CHECK: sitofp i32 {{.*}} to double
- // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
return __atomic_fetch_sub(p, i, memory_order_relaxed);
}
__global__ void ffp6(_Float16 *p) {
// CHECK-LABEL: @_Z4ffp6PDF16
- // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2{{$}}
- // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2{{$}}
-
- // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+ // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
+
+ // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// SAFE: _Z4ffp6PDF16
// SAFE: global_atomic_cmpswap
@@ -190,3 +190,6 @@ __global__ void ffp6(_Float16 *p) {
__hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
__hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
}
+
+// SAFEIR: ![[NO_PRIVATE]] = !{i32 5, i32 6}
+// UNSAFEIR: ![[NO_PRIVATE]] = !{i32 5, i32 6}
diff --git a/clang/test/CodeGenOpenCL/atomic-ops.cl b/clang/test/CodeGenOpenCL/atomic-ops.cl
index 5e2de38ac3d3e3..137d78e32b1da8 100644
--- a/clang/test/CodeGenOpenCL/atomic-ops.cl
+++ b/clang/test/CodeGenOpenCL/atomic-ops.cl
@@ -70,19 +70,19 @@ void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *
void fi3(atomic_int *i, atomic_uint *ui) {
// CHECK-LABEL: @fi3
- // CHECK: atomicrmw and ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
+ // CHECK: atomicrmw and ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[NOPRIVATE:![0-9]+]]{{$}}
int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);
- // CHECK: atomicrmw min ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
+ // CHECK: atomicrmw min ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[NOPRIVATE:![0-9]+]]{{$}}
x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);
- // CHECK: atomicrmw max ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
+ // CHECK: atomicrmw max ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[NOPRIVATE:![0-9]+]]{{$}}
x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);
- // CHECK: atomicrmw umin ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
+ // CHECK: atomicrmw umin ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[NOPRIVATE:![0-9]+]]{{$}}
x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group);
- // CHECK: atomicrmw umax ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
+ // CHECK: atomicrmw umax ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[NOPRIVATE:![0-9]+]]{{$}}
x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group);
}
@@ -186,19 +186,19 @@ void ff2(atomic_float *d) {
float ff3(atomic_float *d) {
// CHECK-LABEL: @ff3
- // CHECK: atomicrmw xchg ptr {{.*}} syncscope("workgroup") seq_cst, align 4
+ // CHECK: atomicrmw xchg ptr {{.*}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[NOPRIVATE:![0-9]+]]{{$}}
return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
}
float ff4(global atomic_float *d, float a) {
// CHECK-LABEL: @ff4
- // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic
+ // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic, align 4{{$}}
return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
}
float ff5(global atomic_double *d, double a) {
// CHECK-LABEL: @ff5
- // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic
+ // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}}
return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
}
@@ -342,3 +342,5 @@ int test_volatile(volatile atomic_int *i) {
}
#endif
+
+// CHECK: [[NOPRIVATE]] = !{i32 5, i32 6}
|
@@ -647,6 +647,14 @@ class LangOptions : public LangOptionsBase { | |||
return ConvergentFunctions; | |||
} | |||
|
|||
/// Return true if atomicrmw operations targeting allocations in private |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we want to have a check in target machine to tell if atomic operation on specific address space is legal? I'm thinking of adding atomic support in AAAddressSpace
, and could drop the address space if an atomic operation in an inferred address space is not legal.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This has nothing to do with the target, this is language semantics. What the target natively supports is an incomprehensibly complicated question, and is a legalization issue. We could annotate some operations with this in infer address spaces, but there's only a few cases where it's probably useful there
e403d08
to
1e084c6
Compare
bff0427
to
32ee4c9
Compare
32ee4c9
to
3e97987
Compare
a8852f0
to
39392c6
Compare
ping |
16dd092
to
c3dbcec
Compare
79e1e8b
to
f84e99b
Compare
39392c6
to
a9840f7
Compare
f84e99b
to
29b8b60
Compare
a9840f7
to
32d917d
Compare
fb9d412
to
d7b148d
Compare
d7b148d
to
4a6df18
Compare
Legacy atomic* apis are allowed to not work for private, but not std::atomic style. Base this based on the language builtins, rather than the language directly. Also start handling cmpxchg.
32d917d
to
cb6691a
Compare
No description provided.