From e8dc58085b58b4e34a89cae7f667194a82bb5ddb Mon Sep 17 00:00:00 2001 From: Yang Hau Date: Tue, 30 Jul 2024 23:26:16 +0800 Subject: [PATCH] feat: Add vext[q]_f64 --- neon2rvv.h | 10 ++++++-- tests/impl.cpp | 64 ++++++++++++++++++++++++++++++++++++++++++++++++-- tests/impl.h | 4 ++-- 3 files changed, 72 insertions(+), 6 deletions(-) diff --git a/neon2rvv.h b/neon2rvv.h index 85866033..8374ab53 100644 --- a/neon2rvv.h +++ b/neon2rvv.h @@ -11574,9 +11574,15 @@ FORCE_INLINE float32x4_t vextq_f32(float32x4_t a, float32x4_t b, const int c) { return __riscv_vslideup_vx_f32m1(a_slidedown, b, 4 - c, 4); } -// FORCE_INLINE float64x1_t vext_f64(float64x1_t a, float64x1_t b, const int n); +FORCE_INLINE float64x1_t vext_f64(float64x1_t a, float64x1_t b, const int c) { + vfloat64m1_t a_slidedown = __riscv_vslidedown_vx_f64m1(a, c, 1); + return __riscv_vslideup_vx_f64m1(a_slidedown, b, 1 - c, 1); +} -// FORCE_INLINE float64x2_t vextq_f64(float64x2_t a, float64x2_t b, const int n); +FORCE_INLINE float64x2_t vextq_f64(float64x2_t a, float64x2_t b, const int c) { + vfloat64m1_t a_slidedown = __riscv_vslidedown_vx_f64m1(a, c, 2); + return __riscv_vslideup_vx_f64m1(a_slidedown, b, 2 - c, 2); +} // FORCE_INLINE poly8x8_t vext_p8(poly8x8_t a, poly8x8_t b, const int n); diff --git a/tests/impl.cpp b/tests/impl.cpp index 71472466..26580e9f 100644 --- a/tests/impl.cpp +++ b/tests/impl.cpp @@ -39230,9 +39230,69 @@ result_t test_vextq_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { #endif // ENABLE_TEST_ALL } -result_t test_vext_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } +result_t test_vext_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { +#ifdef ENABLE_TEST_ALL + const double *_a = (const double *)impl.test_cases_float_pointer1; + const double *_b = (const double *)impl.test_cases_float_pointer2; + const int elt_num = 1; + double _c[elt_num]; + float64x1_t a, b, c; + + float temp_arr[elt_num * 2]; + for (int i = 0; i < elt_num; i++) { + temp_arr[i] = _a[i]; + temp_arr[i + elt_num] = _b[i]; + } + +#define TEST_IMPL(IDX) \ + for (int i = 0; i < elt_num; i++) { \ + _c[i] = temp_arr[i + IDX]; \ + } \ + a = vld1_f64(_a); \ + b = vld1_f64(_b); \ + c = vext_f64(a, b, IDX); \ + CHECK_RESULT(validate_double(c, _c[0])) + + IMM_1_ITER +#undef TEST_IMPL + + return TEST_SUCCESS; +#else + return TEST_UNIMPL; +#endif // ENABLE_TEST_ALL +} -result_t test_vextq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } +result_t test_vextq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { +#ifdef ENABLE_TEST_ALL + const double *_a = (const double *)impl.test_cases_float_pointer1; + const double *_b = (const double *)impl.test_cases_float_pointer2; + const int elt_num = 2; + double _c[elt_num]; + float64x2_t a, b, c; + + float temp_arr[elt_num * 2]; + for (int i = 0; i < elt_num; i++) { + temp_arr[i] = _a[i]; + temp_arr[i + elt_num] = _b[i]; + } + +#define TEST_IMPL(IDX) \ + for (int i = 0; i < elt_num; i++) { \ + _c[i] = temp_arr[i + IDX]; \ + } \ + a = vld1q_f64(_a); \ + b = vld1q_f64(_b); \ + c = vextq_f64(a, b, IDX); \ + CHECK_RESULT(validate_double(c, _c[0], _c[1])) + + IMM_2_ITER +#undef TEST_IMPL + + return TEST_SUCCESS; +#else + return TEST_UNIMPL; +#endif // ENABLE_TEST_ALL +} result_t test_vext_p8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } diff --git a/tests/impl.h b/tests/impl.h index 671d2aec..667d8b68 100644 --- a/tests/impl.h +++ b/tests/impl.h @@ -2269,8 +2269,8 @@ _(vextq_s32) \ _(vextq_s64) \ _(vextq_f32) \ - /*_(vext_f64) */ \ - /*_(vextq_f64) */ \ + _(vext_f64) \ + _(vextq_f64) \ _(vextq_u8) \ _(vextq_u16) \ _(vextq_u32) \