Skip to content

Commit

Permalink
feat: Add vrecps[s|d]_[f32|f64]
Browse files Browse the repository at this point in the history
  • Loading branch information
howjmay committed Jul 30, 2024
1 parent 1d79156 commit f9dd0f0
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 22 deletions.
20 changes: 10 additions & 10 deletions neon2rvv.h
Original file line number Diff line number Diff line change
Expand Up @@ -4796,9 +4796,9 @@ FORCE_INLINE float64x2_t vrecpsq_f64(float64x2_t a, float64x2_t b) {
return __riscv_vfnmsac_vv_f64m1(vdupq_n_f64(2.0), a, b, 2);
}

// FORCE_INLINE float32_t vrecpss_f32(float32_t a, float32_t b);
FORCE_INLINE float32_t vrecpss_f32(float32_t a, float32_t b) { return 2.0 - a * b; }

// FORCE_INLINE float64_t vrecpsd_f64(float64_t a, float64_t b);
FORCE_INLINE float64_t vrecpsd_f64(float64_t a, float64_t b) { return 2.0 - a * b; }

// FORCE_INLINE float32x2_t vsqrt_f32(float32x2_t a);

Expand Down Expand Up @@ -5650,21 +5650,21 @@ FORCE_INLINE uint32x4_t vqrshlq_u32(uint32x4_t a, int32x4_t b) {

// FORCE_INLINE uint64x2_t vqrshlq_u64(uint64x2_t a, int64x2_t b);

// FORCE_INLINE int8_t vqrshlb_s8(int8_t a, int8_t b);
FORCE_INLINE int8_t vqrshlb_s8(int8_t a, int8_t b);

// FORCE_INLINE int16_t vqrshlh_s16(int16_t a, int16_t b);
FORCE_INLINE int16_t vqrshlh_s16(int16_t a, int16_t b);

// FORCE_INLINE int32_t vqrshls_s32(int32_t a, int32_t b);
FORCE_INLINE int32_t vqrshls_s32(int32_t a, int32_t b);

// FORCE_INLINE int64_t vqrshld_s64(int64_t a, int64_t b);
FORCE_INLINE int64_t vqrshld_s64(int64_t a, int64_t b);

// FORCE_INLINE uint8_t vqrshlb_u8(uint8_t a, int8_t b);
FORCE_INLINE uint8_t vqrshlb_u8(uint8_t a, int8_t b);

// FORCE_INLINE uint16_t vqrshlh_u16(uint16_t a, int16_t b);
FORCE_INLINE uint16_t vqrshlh_u16(uint16_t a, int16_t b);

// FORCE_INLINE uint32_t vqrshls_u32(uint32_t a, int32_t b);
FORCE_INLINE uint32_t vqrshls_u32(uint32_t a, int32_t b);

// FORCE_INLINE uint64_t vqrshld_u64(uint64_t a, int64_t b);
FORCE_INLINE uint64_t vqrshld_u64(uint64_t a, int64_t b);

FORCE_INLINE int8x8_t vshr_n_s8(int8x8_t a, const int b) {
const int imm = b - (b >> 3);
Expand Down
26 changes: 24 additions & 2 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16984,9 +16984,31 @@ result_t test_vrecpsq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#endif // ENABLE_TEST_ALL
}

result_t test_vrecpss_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vrecpss_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const float *_a = (const float *)impl.test_cases_float_pointer1;
const float *_b = (const float *)impl.test_cases_float_pointer2;
float _c, c;
_c = 2.0 - _a[0] * _b[0];
c = vrecpss_f32(_a[0], _b[0]);
return validate_float_error(c, _c, 0.001f);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vrecpsd_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vrecpsd_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const double *_a = (const double *)impl.test_cases_float_pointer1;
const double *_b = (const double *)impl.test_cases_float_pointer2;
double _c, c;
_c = 2.0 - _a[0] * _b[0];
c = vrecpsd_f64(_a[0], _b[0]);
return validate_double_error(c, _c, 0.001f);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vsqrt_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }

Expand Down
20 changes: 10 additions & 10 deletions tests/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -959,8 +959,8 @@
_(vrecpsq_f32) \
_(vrecps_f64) \
_(vrecpsq_f64) \
/*_(vrecpss_f32) */ \
/*_(vrecpsd_f64) */ \
_(vrecpss_f32) \
_(vrecpsd_f64) \
/*_(vsqrt_f32) */ \
/*_(vsqrtq_f32) */ \
/*_(vsqrt_f64) */ \
Expand Down Expand Up @@ -1047,14 +1047,14 @@
_(vqrshlq_u16) \
_(vqrshlq_u32) \
_(vqrshlq_u64) \
/*_(vqrshlb_s8) */ \
/*_(vqrshlh_s16) */ \
/*_(vqrshls_s32) */ \
/*_(vqrshld_s64) */ \
/*_(vqrshlb_u8) */ \
/*_(vqrshlh_u16) */ \
/*_(vqrshls_u32) */ \
/*_(vqrshld_u64) */ \
_(vqrshlb_s8) \
_(vqrshlh_s16) \
_(vqrshls_s32) \
_(vqrshld_s64) \
_(vqrshlb_u8) \
_(vqrshlh_u16) \
_(vqrshls_u32) \
_(vqrshld_u64) \
_(vshr_n_s8) \
_(vshr_n_s16) \
_(vshr_n_s32) \
Expand Down

0 comments on commit f9dd0f0

Please sign in to comment.