Skip to content

Commit

Permalink
Merge pull request #487 from howjmay/vext
Browse files Browse the repository at this point in the history
feat: Add vext[q]_f64
  • Loading branch information
howjmay authored Jul 30, 2024
2 parents bf34d52 + e8dc580 commit 2887202
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 6 deletions.
10 changes: 8 additions & 2 deletions neon2rvv.h
Original file line number Diff line number Diff line change
Expand Up @@ -11574,9 +11574,15 @@ FORCE_INLINE float32x4_t vextq_f32(float32x4_t a, float32x4_t b, const int c) {
return __riscv_vslideup_vx_f32m1(a_slidedown, b, 4 - c, 4);
}

// FORCE_INLINE float64x1_t vext_f64(float64x1_t a, float64x1_t b, const int n);
FORCE_INLINE float64x1_t vext_f64(float64x1_t a, float64x1_t b, const int c) {
vfloat64m1_t a_slidedown = __riscv_vslidedown_vx_f64m1(a, c, 1);
return __riscv_vslideup_vx_f64m1(a_slidedown, b, 1 - c, 1);
}

// FORCE_INLINE float64x2_t vextq_f64(float64x2_t a, float64x2_t b, const int n);
FORCE_INLINE float64x2_t vextq_f64(float64x2_t a, float64x2_t b, const int c) {
vfloat64m1_t a_slidedown = __riscv_vslidedown_vx_f64m1(a, c, 2);
return __riscv_vslideup_vx_f64m1(a_slidedown, b, 2 - c, 2);
}

// FORCE_INLINE poly8x8_t vext_p8(poly8x8_t a, poly8x8_t b, const int n);

Expand Down
64 changes: 62 additions & 2 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39230,9 +39230,69 @@ result_t test_vextq_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#endif // ENABLE_TEST_ALL
}

result_t test_vext_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vext_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const double *_a = (const double *)impl.test_cases_float_pointer1;
const double *_b = (const double *)impl.test_cases_float_pointer2;
const int elt_num = 1;
double _c[elt_num];
float64x1_t a, b, c;

float temp_arr[elt_num * 2];
for (int i = 0; i < elt_num; i++) {
temp_arr[i] = _a[i];
temp_arr[i + elt_num] = _b[i];
}

#define TEST_IMPL(IDX) \
for (int i = 0; i < elt_num; i++) { \
_c[i] = temp_arr[i + IDX]; \
} \
a = vld1_f64(_a); \
b = vld1_f64(_b); \
c = vext_f64(a, b, IDX); \
CHECK_RESULT(validate_double(c, _c[0]))

IMM_1_ITER
#undef TEST_IMPL

return TEST_SUCCESS;
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vextq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vextq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const double *_a = (const double *)impl.test_cases_float_pointer1;
const double *_b = (const double *)impl.test_cases_float_pointer2;
const int elt_num = 2;
double _c[elt_num];
float64x2_t a, b, c;

float temp_arr[elt_num * 2];
for (int i = 0; i < elt_num; i++) {
temp_arr[i] = _a[i];
temp_arr[i + elt_num] = _b[i];
}

#define TEST_IMPL(IDX) \
for (int i = 0; i < elt_num; i++) { \
_c[i] = temp_arr[i + IDX]; \
} \
a = vld1q_f64(_a); \
b = vld1q_f64(_b); \
c = vextq_f64(a, b, IDX); \
CHECK_RESULT(validate_double(c, _c[0], _c[1]))

IMM_2_ITER
#undef TEST_IMPL

return TEST_SUCCESS;
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vext_p8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }

Expand Down
4 changes: 2 additions & 2 deletions tests/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2269,8 +2269,8 @@
_(vextq_s32) \
_(vextq_s64) \
_(vextq_f32) \
/*_(vext_f64) */ \
/*_(vextq_f64) */ \
_(vext_f64) \
_(vextq_f64) \
_(vextq_u8) \
_(vextq_u16) \
_(vextq_u32) \
Expand Down

0 comments on commit 2887202

Please sign in to comment.