Skip to content

Commit

Permalink
RISC-V: Refine unsigned avg_floor/avg_ceil
Browse files Browse the repository at this point in the history
This patch is inspired by LLVM patches:
llvm/llvm-project#76550
llvm/llvm-project#77473

Use vaaddu for AVG vectorization.

Before this patch:

        vsetivli        zero,8,e8,mf2,ta,ma
        vle8.v  v3,0(a1)
        vle8.v  v2,0(a2)
        vwaddu.vv        v1,v3,v2
        vsetvli zero,zero,e16,m1,ta,ma
        vadd.vi v1,v1,1
        vsetvli zero,zero,e8,mf2,ta,ma
        vnsrl.wi        v1,v1,1
        vse8.v  v1,0(a0)
        ret

After this patch:

	vsetivli	zero,8,e8,mf2,ta,ma
	csrwi	vxrm,0
	vle8.v	v1,0(a1)
	vle8.v	v2,0(a2)
	vaaddu.vv	v1,v1,v2
	vse8.v	v1,0(a0)
	ret

Note on signed averaging addition

Based on the rvv spec, there is also a variant for signed averaging addition called vaadd.
But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd.
Thus this patch only introduces vaaddu.

More details in:
riscvarchive/riscv-v-spec#935
riscvarchive/riscv-v-spec#934

Tested on both RV32 and RV64 no regression.

Ok for trunk ?

gcc/ChangeLog:

	* config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove.
	(avg<v_double_trunc>3_floor): New pattern.
	(<u>avg<v_double_trunc>3_ceil): Remove.
	(avg<v_double_trunc>3_ceil): New pattern.
	(uavg<mode>3_floor): Ditto.
	(uavg<mode>3_ceil): Ditto.
	* config/riscv/riscv-protos.h (enum insn_flags): Add for average addition.
	(enum insn_type): Ditto.
	* config/riscv/riscv-v.cc: Ditto.
	* config/riscv/vector-iterators.md (ashiftrt): Remove.
	(ASHIFTRT): Ditto.
	* config/riscv/vector.md: Add VLS modes.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test.
	* gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto.
	* gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto.
	* gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto.
	* gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto.
	* gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto.
  • Loading branch information
zhongjuzhe authored and Liaoshihua committed Mar 13, 2024
1 parent 1281929 commit 2d730f3
Show file tree
Hide file tree
Showing 13 changed files with 86 additions and 44 deletions.
50 changes: 38 additions & 12 deletions gcc/config/riscv/autovec.md
Original file line number Diff line number Diff line change
Expand Up @@ -2345,47 +2345,47 @@
;; op[0] = (narrow) ((wide) op[1] + (wide) op[2] + 1)) >> 1;
;; -------------------------------------------------------------------------

(define_expand "<u>avg<v_double_trunc>3_floor"
(define_expand "avg<v_double_trunc>3_floor"
[(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
(truncate:<V_DOUBLE_TRUNC>
(<ext_to_rshift>:VWEXTI
(ashiftrt:VWEXTI
(plus:VWEXTI
(any_extend:VWEXTI
(sign_extend:VWEXTI
(match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
(any_extend:VWEXTI
(sign_extend:VWEXTI
(match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))))))]
"TARGET_VECTOR"
{
/* First emit a widening addition. */
rtx tmp1 = gen_reg_rtx (<MODE>mode);
rtx ops1[] = {tmp1, operands[1], operands[2]};
insn_code icode = code_for_pred_dual_widen (PLUS, <CODE>, <MODE>mode);
insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1);

/* Then a narrowing shift. */
rtx ops2[] = {operands[0], tmp1, const1_rtx};
icode = code_for_pred_narrow_scalar (<EXT_TO_RSHIFT>, <MODE>mode);
icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2);
DONE;
})

(define_expand "<u>avg<v_double_trunc>3_ceil"
(define_expand "avg<v_double_trunc>3_ceil"
[(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
(truncate:<V_DOUBLE_TRUNC>
(<ext_to_rshift>:VWEXTI
(ashiftrt:VWEXTI
(plus:VWEXTI
(plus:VWEXTI
(any_extend:VWEXTI
(sign_extend:VWEXTI
(match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
(any_extend:VWEXTI
(sign_extend:VWEXTI
(match_operand:<V_DOUBLE_TRUNC> 2 "register_operand")))
(const_int 1)))))]
"TARGET_VECTOR"
{
/* First emit a widening addition. */
rtx tmp1 = gen_reg_rtx (<MODE>mode);
rtx ops1[] = {tmp1, operands[1], operands[2]};
insn_code icode = code_for_pred_dual_widen (PLUS, <CODE>, <MODE>mode);
insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1);

/* Then add 1. */
Expand All @@ -2396,11 +2396,37 @@

/* Finally, a narrowing shift. */
rtx ops3[] = {operands[0], tmp2, const1_rtx};
icode = code_for_pred_narrow_scalar (<EXT_TO_RSHIFT>, <MODE>mode);
icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
DONE;
})

;; csrwi vxrm, 2
;; vaaddu.vv vd, vs2, vs1
(define_expand "uavg<mode>3_floor"
[(match_operand:V_VLSI 0 "register_operand")
(match_operand:V_VLSI 1 "register_operand")
(match_operand:V_VLSI 2 "register_operand")]
"TARGET_VECTOR"
{
insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN, operands);
DONE;
})

;; csrwi vxrm, 0
;; vaaddu.vv vd, vs2, vs1
(define_expand "uavg<mode>3_ceil"
[(match_operand:V_VLSI 0 "register_operand")
(match_operand:V_VLSI 1 "register_operand")
(match_operand:V_VLSI 2 "register_operand")]
"TARGET_VECTOR"
{
insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU, operands);
DONE;
})

;; -------------------------------------------------------------------------
;; ---- [FP] Rounding.
;; -------------------------------------------------------------------------
Expand Down
8 changes: 8 additions & 0 deletions gcc/config/riscv/riscv-protos.h
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,12 @@ enum insn_flags : unsigned int

/* Means INSN has FRM operand and the value is FRM_RNE. */
FRM_RNE_P = 1 << 19,

/* Means INSN has VXRM operand and the value is VXRM_RNU. */
VXRM_RNU_P = 1 << 20,

/* Means INSN has VXRM operand and the value is VXRM_RDN. */
VXRM_RDN_P = 1 << 21,
};

enum insn_type : unsigned int
Expand Down Expand Up @@ -426,6 +432,8 @@ enum insn_type : unsigned int
BINARY_OP_TAMU = __MASK_OP_TAMU | BINARY_OP_P,
BINARY_OP_TUMA = __MASK_OP_TUMA | BINARY_OP_P,
BINARY_OP_FRM_DYN = BINARY_OP | FRM_DYN_P,
BINARY_OP_VXRM_RNU = BINARY_OP | VXRM_RNU_P,
BINARY_OP_VXRM_RDN = BINARY_OP | VXRM_RDN_P,

/* Ternary operator. Always have real merge operand. */
TERNARY_OP = HAS_DEST_P | HAS_MASK_P | USE_ALL_TRUES_MASK_P | HAS_MERGE_P
Expand Down
11 changes: 11 additions & 0 deletions gcc/config/riscv/riscv-v.cc
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,13 @@ template <int MAX_OPERANDS> class insn_expander
add_input_operand (frm_rtx, Pmode);
}

void
add_rounding_mode_operand (enum fixed_point_rounding_mode rounding_mode)
{
rtx frm_rtx = gen_int_mode (rounding_mode, Pmode);
add_input_operand (frm_rtx, Pmode);
}

/* Return the vtype mode based on insn_flags.
vtype mode mean the mode vsetvl insn set. */
machine_mode
Expand Down Expand Up @@ -334,6 +341,10 @@ template <int MAX_OPERANDS> class insn_expander
add_rounding_mode_operand (FRM_RMM);
else if (m_insn_flags & FRM_RNE_P)
add_rounding_mode_operand (FRM_RNE);
else if (m_insn_flags & VXRM_RNU_P)
add_rounding_mode_operand (VXRM_RNU);
else if (m_insn_flags & VXRM_RDN_P)
add_rounding_mode_operand (VXRM_RDN);

gcc_assert (insn_data[(int) icode].n_operands == m_opno);
expand (icode, any_mem_p);
Expand Down
5 changes: 0 additions & 5 deletions gcc/config/riscv/vector-iterators.md
Original file line number Diff line number Diff line change
Expand Up @@ -3581,11 +3581,6 @@
(define_code_attr nmsub_nmadd [(plus "nmsub") (minus "nmadd")])
(define_code_attr nmsac_nmacc [(plus "nmsac") (minus "nmacc")])

(define_code_attr ext_to_rshift [(sign_extend "ashiftrt")
(zero_extend "lshiftrt")])
(define_code_attr EXT_TO_RSHIFT [(sign_extend "ASHIFTRT")
(zero_extend "LSHIFTRT")])

(define_code_iterator and_ior [and ior])

(define_code_iterator any_float_binop [plus mult minus div])
Expand Down
12 changes: 6 additions & 6 deletions gcc/config/riscv/vector.md
Original file line number Diff line number Diff line change
Expand Up @@ -4239,8 +4239,8 @@
(set_attr "mode" "<MODE>")])

(define_insn "@pred_<sat_op><mode>"
[(set (match_operand:VI 0 "register_operand" "=vd, vd, vr, vr")
(if_then_else:VI
[(set (match_operand:V_VLSI 0 "register_operand" "=vd, vd, vr, vr")
(if_then_else:V_VLSI
(unspec:<VM>
[(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
(match_operand 5 "vector_length_operand" " rK, rK, rK, rK")
Expand All @@ -4251,10 +4251,10 @@
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
(reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE)
(unspec:VI
[(match_operand:VI 3 "register_operand" " vr, vr, vr, vr")
(match_operand:VI 4 "register_operand" " vr, vr, vr, vr")] VSAT_OP)
(match_operand:VI 2 "vector_merge_operand" " vu, 0, vu, 0")))]
(unspec:V_VLSI
[(match_operand:V_VLSI 3 "register_operand" " vr, vr, vr, vr")
(match_operand:V_VLSI 4 "register_operand" " vr, vr, vr, vr")] VSAT_OP)
(match_operand:V_VLSI 2 "vector_merge_operand" " vu, 0, vu, 0")))]
"TARGET_VECTOR"
"v<sat_op>.vv\t%0,%3,%4%p1"
[(set_attr "type" "<sat_insn_type>")
Expand Down
4 changes: 2 additions & 2 deletions gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ DEF_AVG_FLOOR (uint8_t, uint16_t, 1024)
DEF_AVG_FLOOR (uint8_t, uint16_t, 2048)

/* { dg-final { scan-assembler-times {vwadd\.vv} 10 } } */
/* { dg-final { scan-assembler-times {vwaddu\.vv} 10 } } */
/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 10 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 10 } } */
/* { dg-final { scan-assembler-times {vnsrl\.wi} 10 } } */
/* { dg-final { scan-assembler-times {vaaddu\.vv} 10 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
Expand Down
4 changes: 2 additions & 2 deletions gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ DEF_AVG_FLOOR (uint16_t, uint32_t, 512)
DEF_AVG_FLOOR (uint16_t, uint32_t, 1024)

/* { dg-final { scan-assembler-times {vwadd\.vv} 9 } } */
/* { dg-final { scan-assembler-times {vwaddu\.vv} 9 } } */
/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 9 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 9 } } */
/* { dg-final { scan-assembler-times {vnsrl\.wi} 9 } } */
/* { dg-final { scan-assembler-times {vaaddu\.vv} 9 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
Expand Down
4 changes: 2 additions & 2 deletions gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ DEF_AVG_FLOOR (uint32_t, uint64_t, 256)
DEF_AVG_FLOOR (uint32_t, uint64_t, 512)

/* { dg-final { scan-assembler-times {vwadd\.vv} 8 } } */
/* { dg-final { scan-assembler-times {vwaddu\.vv} 8 } } */
/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 8 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 8 } } */
/* { dg-final { scan-assembler-times {vnsrl\.wi} 8 } } */
/* { dg-final { scan-assembler-times {vaaddu\.vv} 8 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
Expand Down
6 changes: 3 additions & 3 deletions gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-4.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ DEF_AVG_CEIL (uint8_t, uint16_t, 1024)
DEF_AVG_CEIL (uint8_t, uint16_t, 2048)

/* { dg-final { scan-assembler-times {vwadd\.vv} 10 } } */
/* { dg-final { scan-assembler-times {vwaddu\.vv} 10 } } */
/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 10 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 10 } } */
/* { dg-final { scan-assembler-times {vnsrl\.wi} 10 } } */
/* { dg-final { scan-assembler-times {vadd\.vi} 20 } } */
/* { dg-final { scan-assembler-times {vaaddu\.vv} 10 } } */
/* { dg-final { scan-assembler-times {vadd\.vi} 10 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
Expand Down
6 changes: 3 additions & 3 deletions gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-5.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ DEF_AVG_CEIL (uint16_t, uint32_t, 512)
DEF_AVG_CEIL (uint16_t, uint32_t, 1024)

/* { dg-final { scan-assembler-times {vwadd\.vv} 9 } } */
/* { dg-final { scan-assembler-times {vwaddu\.vv} 9 } } */
/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 9 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 9 } } */
/* { dg-final { scan-assembler-times {vnsrl\.wi} 9 } } */
/* { dg-final { scan-assembler-times {vadd\.vi} 18 } } */
/* { dg-final { scan-assembler-times {vaaddu\.vv} 9 } } */
/* { dg-final { scan-assembler-times {vadd\.vi} 9 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
Expand Down
6 changes: 3 additions & 3 deletions gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-6.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ DEF_AVG_CEIL (uint16_t, uint32_t, 256)
DEF_AVG_CEIL (uint16_t, uint32_t, 512)

/* { dg-final { scan-assembler-times {vwadd\.vv} 8 } } */
/* { dg-final { scan-assembler-times {vwaddu\.vv} 8 } } */
/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 8 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 8 } } */
/* { dg-final { scan-assembler-times {vnsrl\.wi} 8 } } */
/* { dg-final { scan-assembler-times {vadd\.vi} 16 } } */
/* { dg-final { scan-assembler-times {vaaddu\.vv} 8 } } */
/* { dg-final { scan-assembler-times {vadd\.vi} 8 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
#include "vec-avg-template.h"

/* { dg-final { scan-assembler-times {\tvwadd\.vv} 6 } } */
/* { dg-final { scan-assembler-times {\tvwaddu\.vv} 6 } } */
/* { dg-final { scan-assembler-times {\tvadd\.vi} 6 } } */
/* { dg-final { scan-assembler-times {\tvnsrl.wi} 6 } } */
/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 3 } } */
/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 3 } } */
/* { dg-final { scan-assembler-times {\tvadd\.vi} 3 } } */
/* { dg-final { scan-assembler-times {\tvnsra.wi} 6 } } */
/* { dg-final { scan-assembler-times {vaaddu\.vv} 6 } } */
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
#include "vec-avg-template.h"

/* { dg-final { scan-assembler-times {\tvwadd\.vv} 6 } } */
/* { dg-final { scan-assembler-times {\tvwaddu\.vv} 6 } } */
/* { dg-final { scan-assembler-times {\tvadd\.vi} 6 } } */
/* { dg-final { scan-assembler-times {\tvnsrl\.wi} 6 } } */
/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 3 } } */
/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 3 } } */
/* { dg-final { scan-assembler-times {\tvadd\.vi} 3 } } */
/* { dg-final { scan-assembler-times {\tvnsra\.wi} 6 } } */
/* { dg-final { scan-assembler-times {vaaddu\.vv} 6 } } */

0 comments on commit 2d730f3

Please sign in to comment.