Skip to content

Commit

Permalink
clang/AMDGPU: Set noalias.addrspace metadata on atomicrmw
Browse files Browse the repository at this point in the history
  • Loading branch information
arsenm committed Aug 8, 2024
1 parent e403d08 commit a8852f0
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 64 deletions.
8 changes: 8 additions & 0 deletions clang/include/clang/Basic/LangOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,14 @@ class LangOptions : public LangOptionsBase {
return ConvergentFunctions;
}

/// Return true if atomicrmw operations targeting allocations in private
/// memory are undefined.
bool threadPrivateMemoryAtomicsAreUndefined() const {
// Should be false for OpenMP.
// TODO: Should this be true for SYCL?
return OpenCL || CUDA;
}

/// Return the OpenCL C or C++ version as a VersionTuple.
VersionTuple getOpenCLVersionTuple() const;

Expand Down
11 changes: 11 additions & 0 deletions clang/lib/CodeGen/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
#include "llvm/Support/AMDGPUAddrSpace.h"

using namespace clang;
using namespace clang::CodeGen;
Expand Down Expand Up @@ -550,6 +551,16 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,

void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata(
CodeGenFunction &CGF, llvm::AtomicRMWInst &RMW) const {

if (RMW.getPointerAddressSpace() == llvm::AMDGPUAS::FLAT_ADDRESS &&
CGF.CGM.getLangOpts().threadPrivateMemoryAtomicsAreUndefined()) {
llvm::MDBuilder MDHelper(CGF.getLLVMContext());
llvm::MDNode *ASRange = MDHelper.createRange(
llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS),
llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS + 1));
RMW.setMetadata(llvm::LLVMContext::MD_noalias_addrspace, ASRange);
}

if (!CGF.getTarget().allowAMDGPUUnsafeFPAtomics())
return;

Expand Down
115 changes: 59 additions & 56 deletions clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,19 @@

__global__ void ffp1(float *p) {
// CHECK-LABEL: @_Z4ffp1Pf
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4{{$}}
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4{{$}}

// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE:[0-9]+]]{{$}}
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]]{{$}}

// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE:[0-9]+]], !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}

// SAFE: _Z4ffp1Pf
// SAFE: global_atomic_cmpswap
Expand Down Expand Up @@ -62,19 +62,19 @@ __global__ void ffp1(float *p) {

__global__ void ffp2(double *p) {
// CHECK-LABEL: @_Z4ffp2Pd
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}}

// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}

// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}

// SAFE-LABEL: @_Z4ffp2Pd
// SAFE: global_atomic_cmpswap_b64
Expand Down Expand Up @@ -102,19 +102,19 @@ __global__ void ffp2(double *p) {
// long double is the same as double for amdgcn.
__global__ void ffp3(long double *p) {
// CHECK-LABEL: @_Z4ffp3Pe
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}}

// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}

// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}

// SAFE-LABEL: @_Z4ffp3Pe
// SAFE: global_atomic_cmpswap_b64
Expand All @@ -139,34 +139,34 @@ __global__ void ffp3(long double *p) {
__device__ double ffp4(double *p, float f) {
// CHECK-LABEL: @_Z4ffp4Pdf
// CHECK: fpext float {{.*}} to double
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
return __atomic_fetch_sub(p, f, memory_order_relaxed);
}

__device__ double ffp5(double *p, int i) {
// CHECK-LABEL: @_Z4ffp5Pdi
// CHECK: sitofp i32 {{.*}} to double
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
return __atomic_fetch_sub(p, i, memory_order_relaxed);
}

__global__ void ffp6(_Float16 *p) {
// CHECK-LABEL: @_Z4ffp6PDF16
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2{{$}}
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2{{$}}

// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}

// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}

// SAFE: _Z4ffp6PDF16
// SAFE: global_atomic_cmpswap
Expand All @@ -190,3 +190,6 @@ __global__ void ffp6(_Float16 *p) {
__hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
__hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
}

// SAFEIR: ![[NO_PRIVATE]] = !{i32 5, i32 6}
// UNSAFEIR: ![[NO_PRIVATE]] = !{i32 5, i32 6}
18 changes: 10 additions & 8 deletions clang/test/CodeGenOpenCL/atomic-ops.cl
Original file line number Diff line number Diff line change
Expand Up @@ -70,19 +70,19 @@ void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *

void fi3(atomic_int *i, atomic_uint *ui) {
// CHECK-LABEL: @fi3
// CHECK: atomicrmw and ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
// CHECK: atomicrmw and ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[NOPRIVATE:![0-9]+]]{{$}}
int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);

// CHECK: atomicrmw min ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
// CHECK: atomicrmw min ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[NOPRIVATE:![0-9]+]]{{$}}
x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);

// CHECK: atomicrmw max ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
// CHECK: atomicrmw max ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[NOPRIVATE:![0-9]+]]{{$}}
x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);

// CHECK: atomicrmw umin ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
// CHECK: atomicrmw umin ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[NOPRIVATE:![0-9]+]]{{$}}
x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group);

// CHECK: atomicrmw umax ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
// CHECK: atomicrmw umax ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[NOPRIVATE:![0-9]+]]{{$}}
x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group);
}

Expand Down Expand Up @@ -186,19 +186,19 @@ void ff2(atomic_float *d) {

float ff3(atomic_float *d) {
// CHECK-LABEL: @ff3
// CHECK: atomicrmw xchg ptr {{.*}} syncscope("workgroup") seq_cst, align 4
// CHECK: atomicrmw xchg ptr {{.*}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[NOPRIVATE:![0-9]+]]{{$}}
return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
}

float ff4(global atomic_float *d, float a) {
// CHECK-LABEL: @ff4
// CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic
// CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic, align 4{{$}}
return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
}

float ff5(global atomic_double *d, double a) {
// CHECK-LABEL: @ff5
// CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic
// CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}}
return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
}

Expand Down Expand Up @@ -342,3 +342,5 @@ int test_volatile(volatile atomic_int *i) {
}

#endif

// CHECK: [[NOPRIVATE]] = !{i32 5, i32 6}

0 comments on commit a8852f0

Please sign in to comment.