Skip to content

Commit

Permalink
Merge pull request #486 from howjmay/vrsqrt
Browse files Browse the repository at this point in the history
feat: Add vrsqrts[q|s|d]_[f32|f64]
  • Loading branch information
howjmay authored Jul 30, 2024
2 parents 3dda46b + 42687ec commit bf34d52
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 17 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
![coverage badge](https://img.shields.io/badge/coverage-75.4%25-brightgreen)
![coverage badge](https://img.shields.io/badge/coverage-78.3%25-brightgreen)
# neon2rvv

A C/C++ header file that converts Arm/Aarch64 NEON intrinsics to RISC-V Vector (RVV) Extension.
Expand Down
12 changes: 8 additions & 4 deletions neon2rvv.h
Original file line number Diff line number Diff line change
Expand Up @@ -4816,13 +4816,17 @@ FORCE_INLINE float32x4_t vrsqrtsq_f32(float32x4_t a, float32x4_t b) {
return __riscv_vfdiv_vf_f32m1(__riscv_vfnmsac_vv_f32m1(vdupq_n_f32(3.0), a, b, 4), 2.0, 4);
}

// FORCE_INLINE float64x1_t vrsqrts_f64(float64x1_t a, float64x1_t b);
FORCE_INLINE float64x1_t vrsqrts_f64(float64x1_t a, float64x1_t b) {
return __riscv_vfdiv_vf_f64m1(__riscv_vfnmsac_vv_f64m1(vdup_n_f64(3.0), a, b, 1), 2.0, 1);
}

// FORCE_INLINE float64x2_t vrsqrtsq_f64(float64x2_t a, float64x2_t b);
FORCE_INLINE float64x2_t vrsqrtsq_f64(float64x2_t a, float64x2_t b) {
return __riscv_vfdiv_vf_f64m1(__riscv_vfnmsac_vv_f64m1(vdupq_n_f64(3.0), a, b, 2), 2.0, 2);
}

// FORCE_INLINE float32_t vrsqrtss_f32(float32_t a, float32_t b);
FORCE_INLINE float32_t vrsqrtss_f32(float32_t a, float32_t b) { return (3.0 - a * b) / 2.0; }

// FORCE_INLINE float64_t vrsqrtsd_f64(float64_t a, float64_t b);
FORCE_INLINE float64_t vrsqrtsd_f64(float64_t a, float64_t b) { return (3.0 - a * b) / 2.0; }

FORCE_INLINE int8x8_t vshl_s8(int8x8_t a, int8x8_t b) {
// implementation only works within defined range 'b' in [0, 7]
Expand Down
71 changes: 63 additions & 8 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16998,8 +16998,8 @@ result_t test_vsqrtq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return

result_t test_vrsqrts_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const float *_a = impl.test_cases_float_pointer1;
const float *_b = impl.test_cases_float_pointer2;
const float *_a = (const float *)impl.test_cases_float_pointer1;
const float *_b = (const float *)impl.test_cases_float_pointer2;
float _c[2];
_c[0] = (3.0 - _a[0] * _b[0]) / 2.0;
_c[1] = (3.0 - _a[1] * _b[1]) / 2.0;
Expand All @@ -17016,8 +17016,8 @@ result_t test_vrsqrts_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {

result_t test_vrsqrtsq_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const float *_a = impl.test_cases_float_pointer1;
const float *_b = impl.test_cases_float_pointer2;
const float *_a = (const float *)impl.test_cases_float_pointer1;
const float *_b = (const float *)impl.test_cases_float_pointer2;
float _c[4];
_c[0] = (3.0 - _a[0] * _b[0]) / 2.0;
_c[1] = (3.0 - _a[1] * _b[1]) / 2.0;
Expand All @@ -17034,13 +17034,68 @@ result_t test_vrsqrtsq_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#endif // ENABLE_TEST_ALL
}

result_t test_vrsqrts_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vrsqrts_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const double *_a = (const double *)impl.test_cases_float_pointer1;
const double *_b = (const double *)impl.test_cases_float_pointer2;
double _c[1];
_c[0] = (3.0 - _a[0] * _b[0]) / 2.0;

result_t test_vrsqrtsq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
float64x1_t a = vld1_f64(_a);
float64x1_t b = vld1_f64(_b);
float64x1_t c = vrsqrts_f64(a, b);

result_t test_vrsqrtss_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
return validate_double_error(c, _c[0], 0.0001f);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vrsqrtsq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const double *_a = (const double *)impl.test_cases_float_pointer1;
const double *_b = (const double *)impl.test_cases_float_pointer2;
double _c[2];
_c[0] = (3.0 - _a[0] * _b[0]) / 2.0;
_c[1] = (3.0 - _a[1] * _b[1]) / 2.0;

result_t test_vrsqrtsd_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
float64x2_t a = vld1q_f64(_a);
float64x2_t b = vld1q_f64(_b);
float64x2_t c = vrsqrtsq_f64(a, b);

return validate_double_error(c, _c[0], _c[1], 0.0001f);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vrsqrtss_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const float *_a = (const float *)impl.test_cases_float_pointer1;
const float *_b = (const float *)impl.test_cases_float_pointer2;
float _c, c;
_c = (3.0 - _a[0] * _b[0]) / 2.0;

c = vrsqrtss_f32(_a[0], _b[0]);
return validate_double_error(c, _c, 0.0001f);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vrsqrtsd_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const double *_a = (const double *)impl.test_cases_float_pointer1;
const double *_b = (const double *)impl.test_cases_float_pointer2;
double _c, c;
_c = (3.0 - _a[0] * _b[0]) / 2.0;

c = vrsqrtsd_f64(_a[0], _b[0]);
return validate_double_error(c, _c, 0.0001f);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vshl_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
Expand Down
8 changes: 4 additions & 4 deletions tests/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -967,10 +967,10 @@
/*_(vsqrtq_f64) */ \
_(vrsqrts_f32) \
_(vrsqrtsq_f32) \
/*_(vrsqrts_f64) */ \
/*_(vrsqrtsq_f64) */ \
/*_(vrsqrtss_f32) */ \
/*_(vrsqrtsd_f64) */ \
_(vrsqrts_f64) \
_(vrsqrtsq_f64) \
_(vrsqrtss_f32) \
_(vrsqrtsd_f64) \
_(vshl_s8) \
_(vshl_s16) \
_(vshl_s32) \
Expand Down

0 comments on commit bf34d52

Please sign in to comment.