From 1f681b5913f9169332e4126ac0aafeedc5ca6cef Mon Sep 17 00:00:00 2001 From: Joe Faulls <67795994+joe-img@users.noreply.github.com> Date: Thu, 12 Sep 2024 14:01:55 +0100 Subject: [PATCH] [CodeGen] Clear InitUndef pass new register cache between pass runs (#90967) Multiple invocations of the pass could interfere with eachother, preventing some undefs being initialised. I found it very difficult to create a unit test for this due to it being dependent on particular allocations of a previous function. However, the bug can be observed here: https://godbolt.org/z/7xnMo41Gv with the creation of the illegal instruction `vnsrl.wi v9, v8, 0` --- llvm/lib/CodeGen/InitUndef.cpp | 1 + .../rvv/fixed-vectors-buildvec-of-binop.ll | 13 ++- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 96 +++++++++---------- .../test/CodeGen/RISCV/rvv/shuffle-reverse.ll | 60 ++++++------ 4 files changed, 85 insertions(+), 85 deletions(-) diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp index 51c50ff872ef21..8f25ede0eb2b7e 100644 --- a/llvm/lib/CodeGen/InitUndef.cpp +++ b/llvm/lib/CodeGen/InitUndef.cpp @@ -272,6 +272,7 @@ bool InitUndef::runOnMachineFunction(MachineFunction &MF) { for (auto *DeadMI : DeadInsts) DeadMI->eraseFromParent(); DeadInsts.clear(); + NewRegs.clear(); return Changed; } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll index 65a1035fd815c0..5d75efe681af73 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll @@ -567,16 +567,15 @@ define <8 x i32> @add_constant_rhs_8xi32_partial(<8 x i32> %vin, i32 %a, i32 %b, ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 5 ; CHECK-NEXT: vmv.s.x v10, a2 -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v12, (a0) ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 6 -; CHECK-NEXT: vmv.s.x v10, a3 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 7 -; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vmv.s.x v12, a3 +; CHECK-NEXT: vslideup.vi v8, v12, 7 +; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %vadd = add <8 x i32> %vin, %e0 = add i32 %a, 23 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 14f4f44049c533..24a5bd154c64f5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -2501,9 +2501,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB35_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -2546,9 +2546,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_8 ; RV64ZVE32F-NEXT: j .LBB35_9 @@ -2652,9 +2652,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB36_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -2697,9 +2697,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_8 ; RV64ZVE32F-NEXT: j .LBB36_9 @@ -2808,9 +2808,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB37_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -2856,9 +2856,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_8 ; RV64ZVE32F-NEXT: j .LBB37_9 @@ -2966,9 +2966,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB38_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -3011,9 +3011,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_8 ; RV64ZVE32F-NEXT: j .LBB38_9 @@ -3118,9 +3118,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB39_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -3163,9 +3163,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_8 ; RV64ZVE32F-NEXT: j .LBB39_9 @@ -3275,9 +3275,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a3 +; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB40_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -3323,9 +3323,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a3 +; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_8 ; RV64ZVE32F-NEXT: j .LBB40_9 @@ -8200,9 +8200,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB74_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -8245,9 +8245,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_8 ; RV64ZVE32F-NEXT: j .LBB74_9 @@ -8351,9 +8351,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB75_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -8396,9 +8396,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_8 ; RV64ZVE32F-NEXT: j .LBB75_9 @@ -8507,9 +8507,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB76_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -8555,9 +8555,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_8 ; RV64ZVE32F-NEXT: j .LBB76_9 @@ -8665,9 +8665,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB77_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -8710,9 +8710,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_8 ; RV64ZVE32F-NEXT: j .LBB77_9 @@ -8817,9 +8817,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB78_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -8862,9 +8862,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_8 ; RV64ZVE32F-NEXT: j .LBB78_9 @@ -8974,9 +8974,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB79_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -9022,9 +9022,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_8 ; RV64ZVE32F-NEXT: j .LBB79_9 diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll index 368f454fa5fda1..d6d57eef47e387 100644 --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -234,10 +234,10 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-NEXT: lui a1, %hi(.LCPI15_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI15_0) ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vle16.v v16, (a1) -; CHECK-NEXT: vmv2r.v v20, v10 +; CHECK-NEXT: vle16.v v20, (a1) +; CHECK-NEXT: vmv2r.v v16, v10 ; CHECK-NEXT: vmv2r.v v12, v8 -; CHECK-NEXT: vrgather.vv v8, v12, v16 +; CHECK-NEXT: vrgather.vv v8, v12, v20 ; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vi v12, v12, 15 ; CHECK-NEXT: lui a0, 16 @@ -245,7 +245,7 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vrgather.vv v8, v20, v12, v0.t +; CHECK-NEXT: vrgather.vv v8, v16, v12, v0.t ; CHECK-NEXT: ret %v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> ret <32 x i16> %v32i16 @@ -296,12 +296,12 @@ define <8 x i32> @v4i32_2(<4 x i32> %a, <4 x i32> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v12, v9 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vi v13, v9, 7 +; CHECK-NEXT: vid.v v13 +; CHECK-NEXT: vrsub.vi v14, v13, 7 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vrgatherei16.vv v10, v8, v13 +; CHECK-NEXT: vrgatherei16.vv v10, v8, v14 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vrsub.vi v8, v9, 3 +; CHECK-NEXT: vrsub.vi v8, v13, 3 ; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vrgatherei16.vv v10, v12, v8, v0.t @@ -330,12 +330,12 @@ define <16 x i32> @v8i32_2(<8 x i32> %a, <8 x i32> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrsub.vi v18, v10, 15 +; CHECK-NEXT: vid.v v18 +; CHECK-NEXT: vrsub.vi v20, v18, 15 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v12, v8, v18 +; CHECK-NEXT: vrgatherei16.vv v12, v8, v20 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vrsub.vi v8, v10, 7 +; CHECK-NEXT: vrsub.vi v8, v18, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -367,10 +367,10 @@ define <32 x i32> @v16i32_2(<16 x i32> %a, <16 x i32> %b) { ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI23_0) ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vle16.v v20, (a1) +; CHECK-NEXT: vle16.v v28, (a1) ; CHECK-NEXT: vmv4r.v v24, v12 ; CHECK-NEXT: vmv4r.v v16, v8 -; CHECK-NEXT: vrgatherei16.vv v8, v16, v20 +; CHECK-NEXT: vrgatherei16.vv v8, v16, v28 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vi v16, v16, 15 @@ -430,12 +430,12 @@ define <8 x i64> @v4i64_2(<4 x i64> %a, <4 x i64> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrsub.vi v11, v10, 7 +; CHECK-NEXT: vid.v v18 +; CHECK-NEXT: vrsub.vi v19, v18, 7 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v12, v8, v11 +; CHECK-NEXT: vrgatherei16.vv v12, v8, v19 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vrsub.vi v8, v10, 3 +; CHECK-NEXT: vrsub.vi v8, v18, 3 ; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vrgatherei16.vv v12, v16, v8, v0.t @@ -605,12 +605,12 @@ define <8 x float> @v4f32_2(<4 x float> %a, <4 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v12, v9 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vi v13, v9, 7 +; CHECK-NEXT: vid.v v13 +; CHECK-NEXT: vrsub.vi v14, v13, 7 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vrgatherei16.vv v10, v8, v13 +; CHECK-NEXT: vrgatherei16.vv v10, v8, v14 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vrsub.vi v8, v9, 3 +; CHECK-NEXT: vrsub.vi v8, v13, 3 ; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vrgatherei16.vv v10, v12, v8, v0.t @@ -639,12 +639,12 @@ define <16 x float> @v8f32_2(<8 x float> %a, <8 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrsub.vi v18, v10, 15 +; CHECK-NEXT: vid.v v18 +; CHECK-NEXT: vrsub.vi v20, v18, 15 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v12, v8, v18 +; CHECK-NEXT: vrgatherei16.vv v12, v8, v20 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vrsub.vi v8, v10, 7 +; CHECK-NEXT: vrsub.vi v8, v18, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -701,12 +701,12 @@ define <8 x double> @v4f64_2(<4 x double> %a, <4 x double> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrsub.vi v11, v10, 7 +; CHECK-NEXT: vid.v v18 +; CHECK-NEXT: vrsub.vi v19, v18, 7 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v12, v8, v11 +; CHECK-NEXT: vrgatherei16.vv v12, v8, v19 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vrsub.vi v8, v10, 3 +; CHECK-NEXT: vrsub.vi v8, v18, 3 ; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vrgatherei16.vv v12, v16, v8, v0.t