Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics #102599

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8047,6 +8047,43 @@ it will contain a list of ids, including the ids of the callsites in the
full inline sequence, in order from the leaf-most call's id to the outermost
inlined call.


'``noalias.addrspace``' Metadata
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The ``noalias.addrspace`` metadata is used to identify memory
operations which cannot access objects allocated in a range of address
spaces. It is attached to memory instructions, including
:ref:`atomicrmw <i_atomicrmw>`, :ref:`cmpxchg <i_cmpxchg>`, and
:ref:`call <i_call>` instructions.

This follows the same form as :ref:`range metadata <range-metadata>`,
except the field entries must be of type `i32`. The interpretation is
the same numeric address spaces as applied to IR values.

Example:

.. code-block:: llvm

; %ptr cannot point to an object allocated in addrspace(5)
%rmw.valid = atomicrmw and ptr %ptr, i64 %value seq_cst, !noalias.addrspace !0

; Undefined behavior. The underlying object is allocated in one of the listed
; address spaces.
%alloca = alloca i64, addrspace(5)
%alloca.cast = addrspacecast ptr addrspace(5) %alloca to ptr
%rmw.ub = atomicrmw and ptr %alloca.cast, i64 %value seq_cst, !noalias.addrspace !0

!0 = !{i32 5, i32 6} ; Exclude addrspace(5) only


This is intended for use on targets with a notion of generic address
spaces, which at runtime resolve to different physical memory
spaces. The interpretation of the address space values is target
specific. The behavior is undefined if the runtime memory address does
resolve to an object defined in one of the indicated address spaces.


Module Flags Metadata
=====================

Expand Down
2 changes: 2 additions & 0 deletions llvm/docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ Changes to the LLVM IR

* Added `usub_cond` and `usub_sat` operations to `atomicrmw`.

* Introduced `noalias.addrspace` metadata.

* Remove the following intrinsics which can be replaced with a `bitcast`:

* `llvm.nvvm.bitcast.f2i`
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/IR/FixedMetadataKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,4 @@ LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 37)
LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38)
LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39)
LLVM_FIXED_MD_KIND(MD_mmra, "mmra", 40)
LLVM_FIXED_MD_KIND(MD_noalias_addrspace, "noalias.addrspace", 41)
13 changes: 12 additions & 1 deletion llvm/lib/IR/AutoUpgrade.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/AMDGPUAddrSpace.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Regex.h"
Expand Down Expand Up @@ -4270,13 +4272,22 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
AtomicRMWInst *RMW =
Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);

if (PtrTy->getAddressSpace() != 3) {
unsigned AddrSpace = PtrTy->getAddressSpace();
if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
MDNode *EmptyMD = MDNode::get(F->getContext(), {});
RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
}

if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
MDBuilder MDB(F->getContext());
MDNode *RangeNotPrivate =
MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
}

if (IsVolatile)
RMW->setVolatile(true);

Expand Down
53 changes: 43 additions & 10 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,14 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
/// Whether a metadata node is allowed to be, or contain, a DILocation.
enum class AreDebugLocsAllowed { No, Yes };

/// Metadata that should be treated as a range, with slightly different
/// requirements.
enum class RangeLikeMetadataKind {
Range, // MD_range
AbsoluteSymbol, // MD_absolute_symbol
NoaliasAddrspace // MD_noalias_addrspace
};

// Verification methods...
void visitGlobalValue(const GlobalValue &GV);
void visitGlobalVariable(const GlobalVariable &GV);
Expand All @@ -515,9 +523,10 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
void visitModuleFlagCGProfileEntry(const MDOperand &MDO);
void visitFunction(const Function &F);
void visitBasicBlock(BasicBlock &BB);
void verifyRangeMetadata(const Value &V, const MDNode *Range, Type *Ty,
bool IsAbsoluteSymbol);
void verifyRangeLikeMetadata(const Value &V, const MDNode *Range, Type *Ty,
RangeLikeMetadataKind Kind);
void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty);
void visitNoaliasAddrspaceMetadata(Instruction &I, MDNode *Range, Type *Ty);
void visitDereferenceableMetadata(Instruction &I, MDNode *MD);
void visitProfMetadata(Instruction &I, MDNode *MD);
void visitCallStackMetadata(MDNode *MD);
Expand Down Expand Up @@ -760,8 +769,9 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) {
// FIXME: Why is getMetadata on GlobalValue protected?
if (const MDNode *AbsoluteSymbol =
GO->getMetadata(LLVMContext::MD_absolute_symbol)) {
verifyRangeMetadata(*GO, AbsoluteSymbol, DL.getIntPtrType(GO->getType()),
true);
verifyRangeLikeMetadata(*GO, AbsoluteSymbol,
DL.getIntPtrType(GO->getType()),
RangeLikeMetadataKind::AbsoluteSymbol);
}
}

Expand Down Expand Up @@ -4136,8 +4146,8 @@ static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {

/// Verify !range and !absolute_symbol metadata. These have the same
/// restrictions, except !absolute_symbol allows the full set.
void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
Type *Ty, bool IsAbsoluteSymbol) {
void Verifier::verifyRangeLikeMetadata(const Value &I, const MDNode *Range,
Type *Ty, RangeLikeMetadataKind Kind) {
unsigned NumOperands = Range->getNumOperands();
Check(NumOperands % 2 == 0, "Unfinished range!", Range);
unsigned NumRanges = NumOperands / 2;
Expand All @@ -4154,8 +4164,14 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,

Check(High->getType() == Low->getType(), "Range pair types must match!",
&I);
Check(High->getType() == Ty->getScalarType(),
"Range types must match instruction type!", &I);

if (Kind == RangeLikeMetadataKind::NoaliasAddrspace) {
Check(High->getType()->isIntegerTy(32),
"noalias.addrspace type must be i32!", &I);
} else {
Check(High->getType() == Ty->getScalarType(),
"Range types must match instruction type!", &I);
}

APInt HighV = High->getValue();
APInt LowV = Low->getValue();
Expand All @@ -4166,7 +4182,9 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
"The upper and lower limits cannot be the same value", &I);

ConstantRange CurRange(LowV, HighV);
Check(!CurRange.isEmptySet() && (IsAbsoluteSymbol || !CurRange.isFullSet()),
Check(!CurRange.isEmptySet() &&
(Kind == RangeLikeMetadataKind::AbsoluteSymbol ||
!CurRange.isFullSet()),
"Range must not be empty!", Range);
if (i != 0) {
Check(CurRange.intersectWith(LastRange).isEmptySet(),
Expand Down Expand Up @@ -4194,7 +4212,15 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) {
assert(Range && Range == I.getMetadata(LLVMContext::MD_range) &&
"precondition violation");
verifyRangeMetadata(I, Range, Ty, false);
verifyRangeLikeMetadata(I, Range, Ty, RangeLikeMetadataKind::Range);
}

void Verifier::visitNoaliasAddrspaceMetadata(Instruction &I, MDNode *Range,
Type *Ty) {
assert(Range && Range == I.getMetadata(LLVMContext::MD_noalias_addrspace) &&
"precondition violation");
verifyRangeLikeMetadata(I, Range, Ty,
RangeLikeMetadataKind::NoaliasAddrspace);
}

void Verifier::checkAtomicMemAccessSize(Type *Ty, const Instruction *I) {
Expand Down Expand Up @@ -5187,6 +5213,13 @@ void Verifier::visitInstruction(Instruction &I) {
visitRangeMetadata(I, Range, I.getType());
}

if (MDNode *Range = I.getMetadata(LLVMContext::MD_noalias_addrspace)) {
Check(isa<LoadInst>(I) || isa<StoreInst>(I) || isa<AtomicRMWInst>(I) ||
isa<AtomicCmpXchgInst>(I) || isa<CallInst>(I),
"noalias.addrspace are only for memory operations!", &I);
visitNoaliasAddrspaceMetadata(I, Range, I.getType());
}

if (I.hasMetadata(LLVMContext::MD_invariant_group)) {
Check(isa<LoadInst>(I) || isa<StoreInst>(I),
"invariant.group metadata is only for loads and stores", &I);
Expand Down
110 changes: 110 additions & 0 deletions llvm/test/Assembler/noalias-addrspace-md.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
; RUN: llvm-as < %s | llvm-dis | FileCheck %s

define i64 @atomicrmw_noalias_addrspace__0_1(ptr %ptr, i64 %val) {
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_1(
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META0:![0-9]+]]
; CHECK-NEXT: ret i64 [[RET]]
;
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !0
ret i64 %ret
}

define i64 @atomicrmw_noalias_addrspace__0_2(ptr %ptr, i64 %val) {
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_2(
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META1:![0-9]+]]
; CHECK-NEXT: ret i64 [[RET]]
;
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !1
ret i64 %ret
}

define i64 @atomicrmw_noalias_addrspace__1_3(ptr %ptr, i64 %val) {
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__1_3(
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META2:![0-9]+]]
; CHECK-NEXT: ret i64 [[RET]]
;
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !2
ret i64 %ret
}

define i64 @atomicrmw_noalias_addrspace__multiple_ranges(ptr %ptr, i64 %val) {
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__multiple_ranges(
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META3:![0-9]+]]
; CHECK-NEXT: ret i64 [[RET]]
;
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !3
ret i64 %ret
}

define i64 @load_noalias_addrspace__5_6(ptr %ptr) {
; CHECK-LABEL: define i64 @load_noalias_addrspace__5_6(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[RET:%.*]] = load i64, ptr [[PTR]], align 4, !noalias.addrspace [[META4:![0-9]+]]
; CHECK-NEXT: ret i64 [[RET]]
;
%ret = load i64, ptr %ptr, align 4, !noalias.addrspace !4
ret i64 %ret
}

define void @store_noalias_addrspace__5_6(ptr %ptr, i64 %val) {
; CHECK-LABEL: define void @store_noalias_addrspace__5_6(
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
; CHECK-NEXT: store i64 [[VAL]], ptr [[PTR]], align 4, !noalias.addrspace [[META4]]
; CHECK-NEXT: ret void
;
store i64 %val, ptr %ptr, align 4, !noalias.addrspace !4
ret void
}

define { i64, i1 } @cmpxchg_noalias_addrspace__5_6(ptr %ptr, i64 %val0, i64 %val1) {
; CHECK-LABEL: define { i64, i1 } @cmpxchg_noalias_addrspace__5_6(
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL0:%.*]], i64 [[VAL1:%.*]]) {
; CHECK-NEXT: [[RET:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL0]], i64 [[VAL1]] monotonic monotonic, align 8, !noalias.addrspace [[META4]]
; CHECK-NEXT: ret { i64, i1 } [[RET]]
;
%ret = cmpxchg ptr %ptr, i64 %val0, i64 %val1 monotonic monotonic, align 8, !noalias.addrspace !4
ret { i64, i1 } %ret
}

declare void @foo()

define void @call_noalias_addrspace__5_6(ptr %ptr) {
; CHECK-LABEL: define void @call_noalias_addrspace__5_6(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: call void @foo(), !noalias.addrspace [[META4]]
; CHECK-NEXT: ret void
;
call void @foo(), !noalias.addrspace !4
ret void
}

define void @call_memcpy_intrinsic_addrspace__5_6(ptr %dst, ptr %src, i64 %size) {
; CHECK-LABEL: define void @call_memcpy_intrinsic_addrspace__5_6(
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) {
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false), !noalias.addrspace [[META4]]
; CHECK-NEXT: ret void
;
call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false), !noalias.addrspace !4
ret void
}

declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0

attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }

!0 = !{i32 0, i32 1}
!1 = !{i32 0, i32 2}
!2 = !{i32 1, i32 3}
!3 = !{i32 4, i32 6, i32 10, i32 55}
!4 = !{i32 5, i32 6}
;.
; CHECK: [[META0]] = !{i32 0, i32 1}
; CHECK: [[META1]] = !{i32 0, i32 2}
; CHECK: [[META2]] = !{i32 1, i32 3}
; CHECK: [[META3]] = !{i32 4, i32 6, i32 10, i32 55}
; CHECK: [[META4]] = !{i32 5, i32 6}
;.
Loading
Loading