From 53bc67c4a690ffdf7445d3d52af03d434f9fd52b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 30 Sep 2024 13:43:53 +0400 Subject: [PATCH 1/2] FastISel: Fix incorrectly using getPointerTy (#110465) This was using the default address space instead of the correct one. Fixes #56055 Keep old method around for ABI compatibility on the release branch. (cherry picked from commit 81ba95cefe1b5a12f0a7d8e6a383bcce9e95b785) --- llvm/include/llvm/CodeGen/FastISel.h | 7 +- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 8 +-- llvm/lib/Target/X86/X86FastISel.cpp | 4 +- llvm/test/CodeGen/X86/issue56055.ll | 81 ++++++++++++++++++++++ 4 files changed, 94 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/X86/issue56055.ll diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h index 3cbc35400181dd..f91bd692accad8 100644 --- a/llvm/include/llvm/CodeGen/FastISel.h +++ b/llvm/include/llvm/CodeGen/FastISel.h @@ -275,7 +275,12 @@ class FastISel { /// This is a wrapper around getRegForValue that also takes care of /// truncating or sign-extending the given getelementptr index value. - Register getRegForGEPIndex(const Value *Idx); + Register getRegForGEPIndex(MVT PtrVT, const Value *Idx); + + /// Retained for ABI compatibility in release branch. + Register getRegForGEPIndex(const Value *Idx) { + return getRegForGEPIndex(TLI.getPointerTy(DL), Idx); + } /// We're checking to see if we can fold \p LI into \p FoldInst. Note /// that we could have a sequence where multiple LLVM IR instructions are diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index ef9f7833551905..246acc7f405837 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -380,14 +380,13 @@ void FastISel::updateValueMap(const Value *I, Register Reg, unsigned NumRegs) { } } -Register FastISel::getRegForGEPIndex(const Value *Idx) { +Register FastISel::getRegForGEPIndex(MVT PtrVT, const Value *Idx) { Register IdxN = getRegForValue(Idx); if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. return Register(); // If the index is smaller or larger than intptr_t, truncate or extend it. - MVT PtrVT = TLI.getPointerTy(DL); EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); if (IdxVT.bitsLT(PtrVT)) { IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN); @@ -543,7 +542,8 @@ bool FastISel::selectGetElementPtr(const User *I) { uint64_t TotalOffs = 0; // FIXME: What's a good SWAG number for MaxOffs? uint64_t MaxOffs = 2048; - MVT VT = TLI.getPointerTy(DL); + MVT VT = TLI.getValueType(DL, I->getType()).getSimpleVT(); + for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); GTI != E; ++GTI) { const Value *Idx = GTI.getOperand(); @@ -584,7 +584,7 @@ bool FastISel::selectGetElementPtr(const User *I) { // N = N + Idx * ElementSize; uint64_t ElementSize = GTI.getSequentialElementStride(DL); - Register IdxN = getRegForGEPIndex(Idx); + Register IdxN = getRegForGEPIndex(VT, Idx); if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. return false; diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 2eae155956368f..5d594bd54fbfc4 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -902,6 +902,8 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { uint64_t Disp = (int32_t)AM.Disp; unsigned IndexReg = AM.IndexReg; unsigned Scale = AM.Scale; + MVT PtrVT = TLI.getValueType(DL, U->getType()).getSimpleVT(); + gep_type_iterator GTI = gep_type_begin(U); // Iterate through the indices, folding what we can. Constants can be // folded, and one dynamic index can be handled, if the scale is supported. @@ -937,7 +939,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { (S == 1 || S == 2 || S == 4 || S == 8)) { // Scaled-index addressing. Scale = S; - IndexReg = getRegForGEPIndex(Op); + IndexReg = getRegForGEPIndex(PtrVT, Op); if (IndexReg == 0) return false; break; diff --git a/llvm/test/CodeGen/X86/issue56055.ll b/llvm/test/CodeGen/X86/issue56055.ll new file mode 100644 index 00000000000000..27eaf13e3b00be --- /dev/null +++ b/llvm/test/CodeGen/X86/issue56055.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -fast-isel < %s | FileCheck -check-prefixes=CHECK,FASTISEL %s +; RUN: llc < %s | FileCheck -check-prefixes=CHECK,SDAG %s + +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-windows-msvc" + +define void @issue56055(ptr addrspace(270) %ptr, ptr %out) { +; CHECK-LABEL: issue56055: +; CHECK: # %bb.0: +; CHECK-NEXT: addl $2, %ecx +; CHECK-NEXT: movl %ecx, (%rdx) +; CHECK-NEXT: retq + %add.ptr = getelementptr inbounds i8, ptr addrspace(270) %ptr, i32 2 + store ptr addrspace(270) %add.ptr, ptr %out + ret void +} + +define void @issue56055_vector(<2 x ptr addrspace(270)> %ptr, ptr %out) { +; CHECK-LABEL: issue56055_vector: +; CHECK: # %bb.0: +; CHECK-NEXT: movdqa (%rcx), %xmm0 +; CHECK-NEXT: paddd __xmm@00000000000000000000000200000002(%rip), %xmm0 +; CHECK-NEXT: movq %xmm0, (%rdx) +; CHECK-NEXT: retq + %add.ptr = getelementptr inbounds i8, <2 x ptr addrspace(270)> %ptr, <2 x i32> + store <2 x ptr addrspace(270)> %add.ptr, ptr %out + ret void +} + +define void @issue56055_small_idx(ptr addrspace(270) %ptr, ptr %out, i16 %idx) { +; CHECK-LABEL: issue56055_small_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: movswl %r8w, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: movl %eax, (%rdx) +; CHECK-NEXT: retq + %add.ptr = getelementptr inbounds i8, ptr addrspace(270) %ptr, i16 %idx + store ptr addrspace(270) %add.ptr, ptr %out + ret void +} + +define void @issue56055_small_idx_vector(<2 x ptr addrspace(270)> %ptr, ptr %out, <2 x i16> %idx) { +; CHECK-LABEL: issue56055_small_idx_vector: +; CHECK: # %bb.0: +; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,2,1,4,5,6,7] +; CHECK-NEXT: psrad $16, %xmm0 +; CHECK-NEXT: paddd (%rcx), %xmm0 +; CHECK-NEXT: movq %xmm0, (%rdx) +; CHECK-NEXT: retq + %add.ptr = getelementptr inbounds i8, <2 x ptr addrspace(270)> %ptr, <2 x i16> %idx + store <2 x ptr addrspace(270)> %add.ptr, ptr %out + ret void +} + +define void @issue56055_large_idx(ptr addrspace(270) %ptr, ptr %out, i64 %idx) { +; CHECK-LABEL: issue56055_large_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addl %ecx, %r8d +; CHECK-NEXT: movl %r8d, (%rdx) +; CHECK-NEXT: retq + %add.ptr = getelementptr inbounds i8, ptr addrspace(270) %ptr, i64 %idx + store ptr addrspace(270) %add.ptr, ptr %out + ret void +} + +define void @issue56055_large_idx_vector(<2 x ptr addrspace(270)> %ptr, ptr %out, <2 x i64> %idx) { +; CHECK-LABEL: issue56055_large_idx_vector: +; CHECK: # %bb.0: +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = mem[0,2,2,3] +; CHECK-NEXT: paddd (%rcx), %xmm0 +; CHECK-NEXT: movq %xmm0, (%rdx) +; CHECK-NEXT: retq + %add.ptr = getelementptr inbounds i8, <2 x ptr addrspace(270)> %ptr, <2 x i64> %idx + store <2 x ptr addrspace(270)> %add.ptr, ptr %out + ret void +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; FASTISEL: {{.*}} +; SDAG: {{.*}} From b29c0e2096b58ac500529aae43611b376c033ead Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 2 Oct 2024 16:15:40 +0400 Subject: [PATCH 2/2] Move out of line --- llvm/include/llvm/CodeGen/FastISel.h | 4 +--- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 4 ++++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h index f91bd692accad8..95e8004cc09c7a 100644 --- a/llvm/include/llvm/CodeGen/FastISel.h +++ b/llvm/include/llvm/CodeGen/FastISel.h @@ -278,9 +278,7 @@ class FastISel { Register getRegForGEPIndex(MVT PtrVT, const Value *Idx); /// Retained for ABI compatibility in release branch. - Register getRegForGEPIndex(const Value *Idx) { - return getRegForGEPIndex(TLI.getPointerTy(DL), Idx); - } + Register getRegForGEPIndex(const Value *Idx); /// We're checking to see if we can fold \p LI into \p FoldInst. Note /// that we could have a sequence where multiple LLVM IR instructions are diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 246acc7f405837..398381a8164b2b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -397,6 +397,10 @@ Register FastISel::getRegForGEPIndex(MVT PtrVT, const Value *Idx) { return IdxN; } +Register FastISel::getRegForGEPIndex(const Value *Idx) { + return getRegForGEPIndex(TLI.getPointerTy(DL), Idx); +} + void FastISel::recomputeInsertPt() { if (getLastLocalValue()) { FuncInfo.InsertPt = getLastLocalValue();