Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reapply "[AMDGPU][GlobalISel] Fix load/store of pointer vectors, buffer.*.pN (#110714)" v2 #111708

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 42 additions & 19 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
return false;

const unsigned Size = Ty.getSizeInBits();
if (Ty.isPointerVector())
return true;
if (Size <= 64)
return false;
// Address space 8 pointers get their own workaround.
Expand All @@ -502,9 +504,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
if (!Ty.isVector())
return true;

if (Ty.isPointerVector())
return true;

unsigned EltSize = Ty.getScalarSizeInBits();
return EltSize != 32 && EltSize != 64;
}
Expand Down Expand Up @@ -5818,8 +5817,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
return Reg;
}

Register AMDGPULegalizerInfo::fixStoreSourceType(
MachineIRBuilder &B, Register VData, bool IsFormat) const {
Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B,
Register VData, LLT MemTy,
bool IsFormat) const {
MachineRegisterInfo *MRI = B.getMRI();
LLT Ty = MRI->getType(VData);

Expand All @@ -5829,6 +5829,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
if (hasBufferRsrcWorkaround(Ty))
return castBufferRsrcToV4I32(VData, B);

if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
Ty = getBitcastRegisterType(Ty);
VData = B.buildBitcast(Ty, VData).getReg(0);
}
// Fixup illegal register types for i8 stores.
if (Ty == LLT::scalar(8) || Ty == S16) {
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
Expand All @@ -5846,22 +5850,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
}

bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B,
LegalizerHelper &Helper,
bool IsTyped,
bool IsFormat) const {
MachineIRBuilder &B = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *B.getMRI();

Register VData = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(VData);
LLT EltTy = Ty.getScalarType();
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
const LLT S32 = LLT::scalar(32);

VData = fixStoreSourceType(B, VData, IsFormat);
castBufferRsrcArgToV4I32(MI, B, 2);
Register RSrc = MI.getOperand(2).getReg();

MachineMemOperand *MMO = *MI.memoperands_begin();
const int MemSize = MMO->getSize().getValue();
LLT MemTy = MMO->getMemoryType();

VData = fixStoreSourceType(B, VData, MemTy, IsFormat);

castBufferRsrcArgToV4I32(MI, B, 2);
Register RSrc = MI.getOperand(2).getReg();

unsigned ImmOffset;

Expand Down Expand Up @@ -5954,10 +5962,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
}

bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B,
LegalizerHelper &Helper,
bool IsFormat,
bool IsTyped) const {
MachineIRBuilder &B = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *B.getMRI();
GISelChangeObserver &Observer = Helper.Observer;

// FIXME: Verifier should enforce 1 MMO for these intrinsics.
MachineMemOperand *MMO = *MI.memoperands_begin();
const LLT MemTy = MMO->getMemoryType();
Expand Down Expand Up @@ -6006,9 +6017,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
// Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
// logic doesn't have to handle that case.
if (hasBufferRsrcWorkaround(Ty)) {
Observer.changingInstr(MI);
Ty = castBufferRsrcFromV4I32(MI, B, MRI, 0);
Observer.changedInstr(MI);
Dst = MI.getOperand(0).getReg();
B.setInsertPt(B.getMBB(), MI);
}
if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
Ty = getBitcastRegisterType(Ty);
Observer.changingInstr(MI);
Helper.bitcastDst(MI, Ty, 0);
Observer.changedInstr(MI);
Dst = MI.getOperand(0).getReg();
B.setInsertPt(B.getMBB(), MI);
}

LLT EltTy = Ty.getScalarType();
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
const bool Unpacked = ST.hasUnpackedD16VMem();
Expand Down Expand Up @@ -7388,17 +7411,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_raw_ptr_buffer_store:
case Intrinsic::amdgcn_struct_buffer_store:
case Intrinsic::amdgcn_struct_ptr_buffer_store:
return legalizeBufferStore(MI, MRI, B, false, false);
return legalizeBufferStore(MI, Helper, false, false);
case Intrinsic::amdgcn_raw_buffer_store_format:
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
case Intrinsic::amdgcn_struct_buffer_store_format:
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
return legalizeBufferStore(MI, MRI, B, false, true);
return legalizeBufferStore(MI, Helper, false, true);
case Intrinsic::amdgcn_raw_tbuffer_store:
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
case Intrinsic::amdgcn_struct_tbuffer_store:
case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
return legalizeBufferStore(MI, MRI, B, true, true);
return legalizeBufferStore(MI, Helper, true, true);
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_ptr_buffer_load:
case Intrinsic::amdgcn_raw_atomic_buffer_load:
Expand All @@ -7407,17 +7430,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_struct_ptr_buffer_load:
case Intrinsic::amdgcn_struct_atomic_buffer_load:
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
return legalizeBufferLoad(MI, MRI, B, false, false);
return legalizeBufferLoad(MI, Helper, false, false);
case Intrinsic::amdgcn_raw_buffer_load_format:
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
case Intrinsic::amdgcn_struct_buffer_load_format:
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
return legalizeBufferLoad(MI, MRI, B, true, false);
return legalizeBufferLoad(MI, Helper, true, false);
case Intrinsic::amdgcn_raw_tbuffer_load:
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
case Intrinsic::amdgcn_struct_tbuffer_load:
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
return legalizeBufferLoad(MI, MRI, B, true, true);
return legalizeBufferLoad(MI, Helper, true, true);
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
case Intrinsic::amdgcn_struct_buffer_atomic_swap:
Expand Down
12 changes: 5 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,15 +195,13 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {

Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Reg, bool ImageStore = false) const;
Register fixStoreSourceType(MachineIRBuilder &B, Register VData,
Register fixStoreSourceType(MachineIRBuilder &B, Register VData, LLT MemTy,
bool IsFormat) const;

bool legalizeBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, bool IsTyped,
bool IsFormat) const;
bool legalizeBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, bool IsFormat,
bool IsTyped) const;
bool legalizeBufferStore(MachineInstr &MI, LegalizerHelper &Helper,
bool IsTyped, bool IsFormat) const;
bool legalizeBufferLoad(MachineInstr &MI, LegalizerHelper &Helper,
bool IsFormat, bool IsTyped) const;
bool legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B,
Intrinsic::ID IID) const;

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ class RegisterTypes<list<ValueType> reg_types> {

def Reg16Types : RegisterTypes<[i16, f16, bf16]>;
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>;
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, v4i16, v4f16, v4bf16]>;
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, p1, p4, v4i16, v4f16, v4bf16]>;
def Reg96Types : RegisterTypes<[v3i32, v3f32]>;
def Reg128Types : RegisterTypes<[v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16]>;

Expand Down
Loading
Loading