From f9dd0f0ca85002b4182ebdd5a23d347cae251c2c Mon Sep 17 00:00:00 2001 From: Yang Hau Date: Wed, 31 Jul 2024 00:21:29 +0800 Subject: [PATCH] feat: Add vrecps[s|d]_[f32|f64] --- neon2rvv.h | 20 ++++++++++---------- tests/impl.cpp | 26 ++++++++++++++++++++++++-- tests/impl.h | 20 ++++++++++---------- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/neon2rvv.h b/neon2rvv.h index b97ed5ff..d38c6a55 100644 --- a/neon2rvv.h +++ b/neon2rvv.h @@ -4796,9 +4796,9 @@ FORCE_INLINE float64x2_t vrecpsq_f64(float64x2_t a, float64x2_t b) { return __riscv_vfnmsac_vv_f64m1(vdupq_n_f64(2.0), a, b, 2); } -// FORCE_INLINE float32_t vrecpss_f32(float32_t a, float32_t b); +FORCE_INLINE float32_t vrecpss_f32(float32_t a, float32_t b) { return 2.0 - a * b; } -// FORCE_INLINE float64_t vrecpsd_f64(float64_t a, float64_t b); +FORCE_INLINE float64_t vrecpsd_f64(float64_t a, float64_t b) { return 2.0 - a * b; } // FORCE_INLINE float32x2_t vsqrt_f32(float32x2_t a); @@ -5650,21 +5650,21 @@ FORCE_INLINE uint32x4_t vqrshlq_u32(uint32x4_t a, int32x4_t b) { // FORCE_INLINE uint64x2_t vqrshlq_u64(uint64x2_t a, int64x2_t b); -// FORCE_INLINE int8_t vqrshlb_s8(int8_t a, int8_t b); +FORCE_INLINE int8_t vqrshlb_s8(int8_t a, int8_t b); -// FORCE_INLINE int16_t vqrshlh_s16(int16_t a, int16_t b); +FORCE_INLINE int16_t vqrshlh_s16(int16_t a, int16_t b); -// FORCE_INLINE int32_t vqrshls_s32(int32_t a, int32_t b); +FORCE_INLINE int32_t vqrshls_s32(int32_t a, int32_t b); -// FORCE_INLINE int64_t vqrshld_s64(int64_t a, int64_t b); +FORCE_INLINE int64_t vqrshld_s64(int64_t a, int64_t b); -// FORCE_INLINE uint8_t vqrshlb_u8(uint8_t a, int8_t b); +FORCE_INLINE uint8_t vqrshlb_u8(uint8_t a, int8_t b); -// FORCE_INLINE uint16_t vqrshlh_u16(uint16_t a, int16_t b); +FORCE_INLINE uint16_t vqrshlh_u16(uint16_t a, int16_t b); -// FORCE_INLINE uint32_t vqrshls_u32(uint32_t a, int32_t b); +FORCE_INLINE uint32_t vqrshls_u32(uint32_t a, int32_t b); -// FORCE_INLINE uint64_t vqrshld_u64(uint64_t a, int64_t b); +FORCE_INLINE uint64_t vqrshld_u64(uint64_t a, int64_t b); FORCE_INLINE int8x8_t vshr_n_s8(int8x8_t a, const int b) { const int imm = b - (b >> 3); diff --git a/tests/impl.cpp b/tests/impl.cpp index e9d15254..466714fe 100644 --- a/tests/impl.cpp +++ b/tests/impl.cpp @@ -16984,9 +16984,31 @@ result_t test_vrecpsq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { #endif // ENABLE_TEST_ALL } -result_t test_vrecpss_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } +result_t test_vrecpss_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { +#ifdef ENABLE_TEST_ALL + const float *_a = (const float *)impl.test_cases_float_pointer1; + const float *_b = (const float *)impl.test_cases_float_pointer2; + float _c, c; + _c = 2.0 - _a[0] * _b[0]; + c = vrecpss_f32(_a[0], _b[0]); + return validate_float_error(c, _c, 0.001f); +#else + return TEST_UNIMPL; +#endif // ENABLE_TEST_ALL +} -result_t test_vrecpsd_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } +result_t test_vrecpsd_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { +#ifdef ENABLE_TEST_ALL + const double *_a = (const double *)impl.test_cases_float_pointer1; + const double *_b = (const double *)impl.test_cases_float_pointer2; + double _c, c; + _c = 2.0 - _a[0] * _b[0]; + c = vrecpsd_f64(_a[0], _b[0]); + return validate_double_error(c, _c, 0.001f); +#else + return TEST_UNIMPL; +#endif // ENABLE_TEST_ALL +} result_t test_vsqrt_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } diff --git a/tests/impl.h b/tests/impl.h index fe58a6fc..e15cb822 100644 --- a/tests/impl.h +++ b/tests/impl.h @@ -959,8 +959,8 @@ _(vrecpsq_f32) \ _(vrecps_f64) \ _(vrecpsq_f64) \ - /*_(vrecpss_f32) */ \ - /*_(vrecpsd_f64) */ \ + _(vrecpss_f32) \ + _(vrecpsd_f64) \ /*_(vsqrt_f32) */ \ /*_(vsqrtq_f32) */ \ /*_(vsqrt_f64) */ \ @@ -1047,14 +1047,14 @@ _(vqrshlq_u16) \ _(vqrshlq_u32) \ _(vqrshlq_u64) \ - /*_(vqrshlb_s8) */ \ - /*_(vqrshlh_s16) */ \ - /*_(vqrshls_s32) */ \ - /*_(vqrshld_s64) */ \ - /*_(vqrshlb_u8) */ \ - /*_(vqrshlh_u16) */ \ - /*_(vqrshls_u32) */ \ - /*_(vqrshld_u64) */ \ + _(vqrshlb_s8) \ + _(vqrshlh_s16) \ + _(vqrshls_s32) \ + _(vqrshld_s64) \ + _(vqrshlb_u8) \ + _(vqrshlh_u16) \ + _(vqrshls_u32) \ + _(vqrshld_u64) \ _(vshr_n_s8) \ _(vshr_n_s16) \ _(vshr_n_s32) \