Skip to content

Commit

Permalink
[X86] Support vectorized llvm.fmaximum/fminimum.vXf16 lowering (#120988)
Browse files Browse the repository at this point in the history
Support the lowering of vectorized FMINIMUM and FMAXIMUM to vminph and
vmaxph on types v8f16, v16f16 when AVX512FP, AVX512VL features are
present, and on type v32f16 when AVX512FP is present.
  • Loading branch information
xilinbai-intel authored Dec 25, 2024
1 parent 676b48d commit 7226b39
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 25 deletions.
9 changes: 9 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2333,6 +2333,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,

setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);

setOperationAction(ISD::FMINIMUM, MVT::v32f16, Custom);
setOperationAction(ISD::FMAXIMUM, MVT::v32f16, Custom);
}

if (Subtarget.hasVLX()) {
Expand Down Expand Up @@ -2377,6 +2380,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Need to custom widen these to prevent scalarization.
setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
setOperationAction(ISD::STORE, MVT::v4f16, Custom);

setOperationAction(ISD::FMINIMUM, MVT::v8f16, Custom);
setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Custom);

setOperationAction(ISD::FMINIMUM, MVT::v16f16, Custom);
setOperationAction(ISD::FMAXIMUM, MVT::v16f16, Custom);
}
}

Expand Down
82 changes: 57 additions & 25 deletions llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ declare half @llvm.minimum.f16(half, half)
declare half @llvm.maximum.f16(half, half)
declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>)
declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)
declare <16 x half> @llvm.minimum.v16f16(<16 x half>, <16 x half>)
declare <16 x half> @llvm.maximum.v16f16(<16 x half>, <16 x half>)
declare <32 x half> @llvm.minimum.v32f16(<32 x half>, <32 x half>)
declare <32 x half> @llvm.maximum.v32f16(<32 x half>, <32 x half>)

define half @test_fminimum(half %x, half %y) {
; CHECK-LABEL: test_fminimum:
Expand All @@ -25,20 +29,10 @@ define half @test_fminimum(half %x, half %y) {
ret half %z
}

define <8 x half> @test_fminimum_scalarize(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fminimum_scalarize:
define <8 x half> @test_fminimum_v8f16(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fminimum_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcmpltph %xmm1, %xmm0, %k1
; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm2 {%k1}
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm3 = [32768,32768,32768,32768,32768,32768,32768,32768]
; CHECK-NEXT: vpcmpeqw %xmm3, %xmm0, %k1
; CHECK-NEXT: vpblendmw %xmm0, %xmm2, %xmm0 {%k1}
; CHECK-NEXT: vpcmpeqw %xmm3, %xmm1, %k1
; CHECK-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqph %xmm1, %xmm2, %k1
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%r = call <8 x half> @llvm.minimum.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %r
Expand Down Expand Up @@ -113,19 +107,10 @@ define half @test_fmaximum(half %x, half %y) {
ret half %r
}

define <8 x half> @test_fmaximum_scalarize(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fmaximum_scalarize:
define <8 x half> @test_fmaximum_v8f16(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fmaximum_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcmpltph %xmm0, %xmm1, %k1
; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm2 {%k1}
; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
; CHECK-NEXT: vpblendmw %xmm0, %xmm2, %xmm0 {%k1}
; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqph %xmm1, %xmm2, %k1
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%r = call <8 x half> @llvm.maximum.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %r
Expand Down Expand Up @@ -186,3 +171,50 @@ define half @test_fmaximum_combine_cmps(half %x, half %y) {
%2 = tail call half @llvm.maximum.f16(half %x, half %1)
ret half %2
}

define <16 x half> @test_fminimum_v16f16(<16 x half> %x, <16 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fminimum_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%r = call <16 x half> @llvm.minimum.v16f16(<16 x half> %x, <16 x half> %y)
ret <16 x half> %r
}

define <16 x half> @test_fmaximum_v16f16_nans(<16 x half> %x, <16 x half> %y) "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fmaximum_v16f16_nans:
; CHECK: # %bb.0:
; CHECK-NEXT: vmaxph %ymm1, %ymm0, %ymm1
; CHECK-NEXT: vcmpunordph %ymm0, %ymm0, %k1
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%r = call <16 x half> @llvm.maximum.v16f16(<16 x half> %x, <16 x half> %y)
ret <16 x half> %r
}

define <32 x half> @test_fminimum_v32f16_szero(<32 x half> %x, <32 x half> %y) "no-nans-fp-math"="true" {
; CHECK-LABEL: test_fminimum_v32f16_szero:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovw2m %zmm0, %k1
; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm2 {%k1}
; CHECK-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
; CHECK-NEXT: vminph %zmm2, %zmm0, %zmm0
; CHECK-NEXT: retq
%r = call <32 x half> @llvm.minimum.v32f16(<32 x half> %x, <32 x half> %y)
ret <32 x half> %r
}

define <32 x half> @test_fmaximum_v32f16_nans_szero(<32 x half> %x, <32 x half> %y) {
; CHECK-LABEL: test_fmaximum_v32f16_nans_szero:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovw2m %zmm0, %k1
; CHECK-NEXT: vpblendmw %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmaxph %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vcmpunordph %zmm1, %zmm1, %k1
; CHECK-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
%r = call <32 x half> @llvm.maximum.v32f16(<32 x half> %x, <32 x half> %y)
ret <32 x half> %r
}

0 comments on commit 7226b39

Please sign in to comment.