Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clang/AMDGPU: Set noalias.addrspace metadata on atomicrmw #102462

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions clang/include/clang/AST/Expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -6777,6 +6777,17 @@ class AtomicExpr : public Expr {
getOp() <= AO__opencl_atomic_store;
}

bool isHIP() const {
return Op >= AO__hip_atomic_compare_exchange_strong &&
Op <= AO__hip_atomic_store;
}

/// Return true if atomics operations targeting allocations in private memory
/// are undefined.
bool threadPrivateMemoryAtomicsAreUndefined() const {
return isOpenCL() || isHIP();
}

SourceLocation getBuiltinLoc() const { return BuiltinLoc; }
SourceLocation getRParenLoc() const { return RParenLoc; }

Expand Down
8 changes: 8 additions & 0 deletions clang/include/clang/Basic/LangOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,14 @@ class LangOptions : public LangOptionsBase {
return ConvergentFunctions;
}

/// Return true if atomicrmw operations targeting allocations in private
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to have a check in target machine to tell if atomic operation on specific address space is legal? I'm thinking of adding atomic support in AAAddressSpace, and could drop the address space if an atomic operation in an inferred address space is not legal.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has nothing to do with the target, this is language semantics. What the target natively supports is an incomprehensibly complicated question, and is a legalization issue. We could annotate some operations with this in infer address spaces, but there's only a few cases where it's probably useful there

/// memory are undefined.
bool threadPrivateMemoryAtomicsAreUndefined() const {
// Should be false for OpenMP.
// TODO: Should this be true for SYCL?
arsenm marked this conversation as resolved.
Show resolved Hide resolved
return OpenCL || CUDA;
arsenm marked this conversation as resolved.
Show resolved Hide resolved
}

/// Return the OpenCL C or C++ version as a VersionTuple.
VersionTuple getOpenCLVersionTuple() const;

Expand Down
9 changes: 5 additions & 4 deletions clang/lib/CodeGen/CGAtomic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
Ptr, Expected, Desired, SuccessOrder, FailureOrder, Scope);
Pair->setVolatile(E->isVolatile());
Pair->setWeak(IsWeak);
CGF.getTargetHooks().setTargetAtomicMetadata(CGF, *Pair, E);

// Cmp holds the result of the compare-exchange operation: true on success,
// false on failure.
Expand Down Expand Up @@ -727,7 +728,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,

llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
llvm::AtomicRMWInst *RMWI =
CGF.emitAtomicRMWInst(Op, Ptr, LoadVal1, Order, Scope);
CGF.emitAtomicRMWInst(Op, Ptr, LoadVal1, Order, Scope, E);
RMWI->setVolatile(E->isVolatile());

// For __atomic_*_fetch operations, perform the operation again to
Expand Down Expand Up @@ -2048,11 +2049,11 @@ std::pair<RValue, llvm::Value *> CodeGenFunction::EmitAtomicCompareExchange(
llvm::AtomicRMWInst *
CodeGenFunction::emitAtomicRMWInst(llvm::AtomicRMWInst::BinOp Op, Address Addr,
llvm::Value *Val, llvm::AtomicOrdering Order,
llvm::SyncScope::ID SSID) {

llvm::SyncScope::ID SSID,
const AtomicExpr *AE) {
llvm::AtomicRMWInst *RMW =
Builder.CreateAtomicRMW(Op, Addr, Val, Order, SSID);
getTargetHooks().setTargetAtomicMetadata(*this, *RMW);
getTargetHooks().setTargetAtomicMetadata(*this, *RMW, AE);
return RMW;
}

Expand Down
3 changes: 2 additions & 1 deletion clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -4166,7 +4166,8 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::AtomicRMWInst *emitAtomicRMWInst(
llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val,
llvm::AtomicOrdering Order = llvm::AtomicOrdering::SequentiallyConsistent,
llvm::SyncScope::ID SSID = llvm::SyncScope::System);
llvm::SyncScope::ID SSID = llvm::SyncScope::System,
const AtomicExpr *AE = nullptr);

void EmitAtomicUpdate(LValue LVal, llvm::AtomicOrdering AO,
const llvm::function_ref<RValue(RValue)> &UpdateOp,
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/CodeGen/TargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,9 @@ class TargetCodeGenInfo {

/// Allow the target to apply other metadata to an atomic instruction
virtual void setTargetAtomicMetadata(CodeGenFunction &CGF,
llvm::AtomicRMWInst &RMW) const {}
llvm::Instruction &AtomicInst,
const AtomicExpr *Expr = nullptr) const {
}

/// Interface class for filling custom fields of a block literal for OpenCL.
class TargetOpenCLBlockHelper {
Expand Down
36 changes: 29 additions & 7 deletions clang/lib/CodeGen/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
#include "llvm/Support/AMDGPUAddrSpace.h"

using namespace clang;
using namespace clang::CodeGen;
Expand Down Expand Up @@ -312,7 +313,8 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
llvm::AtomicOrdering Ordering,
llvm::LLVMContext &Ctx) const override;
void setTargetAtomicMetadata(CodeGenFunction &CGF,
llvm::AtomicRMWInst &RMW) const override;
llvm::Instruction &AtomicInst,
const AtomicExpr *Expr = nullptr) const override;
llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF,
llvm::Function *BlockInvokeFunc,
llvm::Type *BlockTy) const override;
Expand Down Expand Up @@ -549,19 +551,39 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
}

void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata(
CodeGenFunction &CGF, llvm::AtomicRMWInst &RMW) const {
if (!CGF.getTarget().allowAMDGPUUnsafeFPAtomics())
CodeGenFunction &CGF, llvm::Instruction &AtomicInst,
const AtomicExpr *AE) const {
auto *RMW = dyn_cast<llvm::AtomicRMWInst>(&AtomicInst);
auto *CmpX = dyn_cast<llvm::AtomicCmpXchgInst>(&AtomicInst);

// OpenCL and old style HIP atomics consider atomics targeting thread private
// memory to be undefined.
//
// TODO: This is probably undefined for atomic load/store, but there's not
// much direct codegen benefit to knowing this.
if (((RMW && RMW->getPointerAddressSpace() == llvm::AMDGPUAS::FLAT_ADDRESS) ||
(CmpX &&
CmpX->getPointerAddressSpace() == llvm::AMDGPUAS::FLAT_ADDRESS)) &&
AE && AE->threadPrivateMemoryAtomicsAreUndefined()) {
llvm::MDBuilder MDHelper(CGF.getLLVMContext());
llvm::MDNode *ASRange = MDHelper.createRange(
llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS),
llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS + 1));
AtomicInst.setMetadata(llvm::LLVMContext::MD_noalias_addrspace, ASRange);
}

if (!RMW || !CGF.getTarget().allowAMDGPUUnsafeFPAtomics())
return;

// TODO: Introduce new, more controlled options that also work for integers,
// and deprecate allowAMDGPUUnsafeFPAtomics.
llvm::AtomicRMWInst::BinOp RMWOp = RMW.getOperation();
llvm::AtomicRMWInst::BinOp RMWOp = RMW->getOperation();
if (llvm::AtomicRMWInst::isFPOperation(RMWOp)) {
llvm::MDNode *Empty = llvm::MDNode::get(CGF.getLLVMContext(), {});
RMW.setMetadata("amdgpu.no.fine.grained.memory", Empty);
RMW->setMetadata("amdgpu.no.fine.grained.memory", Empty);

if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW.getType()->isFloatTy())
RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW->getType()->isFloatTy())
RMW->setMetadata("amdgpu.ignore.denormal.mode", Empty);
}
}

Expand Down
Loading
Loading