Skip to content

Commit

Permalink
Merge pull request #490 from howjmay/vst1_lane
Browse files Browse the repository at this point in the history
feat: vst1[q]_lane_f64
  • Loading branch information
howjmay authored Jul 30, 2024
2 parents 806191d + 7d5ed1e commit ad630ec
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 32 deletions.
62 changes: 34 additions & 28 deletions neon2rvv.h
Original file line number Diff line number Diff line change
Expand Up @@ -13332,68 +13332,68 @@ FORCE_INLINE void vst1q_u64(uint64_t *a, uint64x2_t b) { __riscv_vse64_v_u64m1(a

// FORCE_INLINE void vst1q_f16(float16_t * ptr, float16x8_t val);

FORCE_INLINE void vst1_lane_s8(int8_t *a, int8x8_t b, const int c) {
vint8m1_t b_s = __riscv_vslidedown_vx_i8m1(b, c, 8);
FORCE_INLINE void vst1_lane_s8(int8_t *a, int8x8_t b, const int lane) {
vint8m1_t b_s = __riscv_vslidedown_vx_i8m1(b, lane, 8);
*a = __riscv_vmv_x_s_i8m1_i8(b_s);
}

FORCE_INLINE void vst1_lane_s16(int16_t *a, int16x4_t b, const int c) {
vint16m1_t b_s = __riscv_vslidedown_vx_i16m1(b, c, 4);
FORCE_INLINE void vst1_lane_s16(int16_t *a, int16x4_t b, const int lane) {
vint16m1_t b_s = __riscv_vslidedown_vx_i16m1(b, lane, 4);
*a = __riscv_vmv_x_s_i16m1_i16(b_s);
}

FORCE_INLINE void vst1_lane_s32(int32_t *a, int32x2_t b, const int c) {
vint32m1_t b_s = __riscv_vslidedown_vx_i32m1(b, c, 2);
FORCE_INLINE void vst1_lane_s32(int32_t *a, int32x2_t b, const int lane) {
vint32m1_t b_s = __riscv_vslidedown_vx_i32m1(b, lane, 2);
*a = __riscv_vmv_x_s_i32m1_i32(b_s);
}

FORCE_INLINE void vst1_lane_f32(float32_t *a, float32x2_t b, const int c) {
vfloat32m1_t b_s = __riscv_vslidedown_vx_f32m1(b, c, 2);
FORCE_INLINE void vst1_lane_f32(float32_t *a, float32x2_t b, const int lane) {
vfloat32m1_t b_s = __riscv_vslidedown_vx_f32m1(b, lane, 2);
*a = __riscv_vfmv_f_s_f32m1_f32(b_s);
}

FORCE_INLINE void vst1_lane_u8(uint8_t *a, uint8x8_t b, const int c) {
vuint8m1_t b_s = __riscv_vslidedown_vx_u8m1(b, c, 8);
FORCE_INLINE void vst1_lane_u8(uint8_t *a, uint8x8_t b, const int lane) {
vuint8m1_t b_s = __riscv_vslidedown_vx_u8m1(b, lane, 8);
*a = __riscv_vmv_x_s_u8m1_u8(b_s);
}

FORCE_INLINE void vst1_lane_u16(uint16_t *a, uint16x4_t b, const int c) {
vuint16m1_t b_s = __riscv_vslidedown_vx_u16m1(b, c, 4);
FORCE_INLINE void vst1_lane_u16(uint16_t *a, uint16x4_t b, const int lane) {
vuint16m1_t b_s = __riscv_vslidedown_vx_u16m1(b, lane, 4);
*a = __riscv_vmv_x_s_u16m1_u16(b_s);
}

FORCE_INLINE void vst1_lane_u32(uint32_t *a, uint32x2_t b, const int c) {
vuint32m1_t b_s = __riscv_vslidedown_vx_u32m1(b, c, 2);
FORCE_INLINE void vst1_lane_u32(uint32_t *a, uint32x2_t b, const int lane) {
vuint32m1_t b_s = __riscv_vslidedown_vx_u32m1(b, lane, 2);
*a = __riscv_vmv_x_s_u32m1_u32(b_s);
}

FORCE_INLINE void vst1_lane_s64(int64_t *a, int64x1_t b, const int c) {
vint64m1_t b_s = __riscv_vslidedown_vx_i64m1(b, c, 1);
FORCE_INLINE void vst1_lane_s64(int64_t *a, int64x1_t b, const int lane) {
vint64m1_t b_s = __riscv_vslidedown_vx_i64m1(b, lane, 1);
*a = __riscv_vmv_x_s_i64m1_i64(b_s);
}

FORCE_INLINE void vst1_lane_u64(uint64_t *a, uint64x1_t b, const int c) {
vuint64m1_t b_s = __riscv_vslidedown_vx_u64m1(b, c, 1);
FORCE_INLINE void vst1_lane_u64(uint64_t *a, uint64x1_t b, const int lane) {
vuint64m1_t b_s = __riscv_vslidedown_vx_u64m1(b, lane, 1);
*a = __riscv_vmv_x_s_u64m1_u64(b_s);
}

FORCE_INLINE void vst1q_lane_s8(int8_t *a, int8x16_t b, const int c) {
int8x8_t b_s = __riscv_vslidedown_vx_i8m1(b, c, 8);
FORCE_INLINE void vst1q_lane_s8(int8_t *a, int8x16_t b, const int lane) {
int8x8_t b_s = __riscv_vslidedown_vx_i8m1(b, lane, 8);
*a = __riscv_vmv_x_s_i8m1_i8(b_s);
}

FORCE_INLINE void vst1q_lane_s16(int16_t *a, int16x8_t b, const int c) {
vint16m1_t b_s = __riscv_vslidedown_vx_i16m1(b, c, 4);
FORCE_INLINE void vst1q_lane_s16(int16_t *a, int16x8_t b, const int lane) {
vint16m1_t b_s = __riscv_vslidedown_vx_i16m1(b, lane, 4);
*a = __riscv_vmv_x_s_i16m1_i16(b_s);
}

FORCE_INLINE void vst1q_lane_s32(int32_t *a, int32x4_t b, const int c) {
vint32m1_t b_s = __riscv_vslidedown_vx_i32m1(b, c, 4);
FORCE_INLINE void vst1q_lane_s32(int32_t *a, int32x4_t b, const int lane) {
vint32m1_t b_s = __riscv_vslidedown_vx_i32m1(b, lane, 4);
*a = __riscv_vmv_x_s_i32m1_i32(b_s);
}

FORCE_INLINE void vst1q_lane_f32(float32_t *a, float32x4_t b, const int c) {
vfloat32m1_t b_s = __riscv_vslidedown_vx_f32m1(b, c, 4);
FORCE_INLINE void vst1q_lane_f32(float32_t *a, float32x4_t b, const int lane) {
vfloat32m1_t b_s = __riscv_vslidedown_vx_f32m1(b, lane, 4);
*a = __riscv_vfmv_f_s_f32m1_f32(b_s);
}

Expand All @@ -13405,9 +13405,15 @@ FORCE_INLINE void vst1q_lane_f32(float32_t *a, float32x4_t b, const int c) {

// FORCE_INLINE void vst1q_lane_p16(poly16_t * ptr, poly16x8_t val, const int lane);

// FORCE_INLINE void vst1_lane_f64(float64_t * ptr, float64x1_t val, const int lane);
FORCE_INLINE void vst1_lane_f64(float64_t *a, float64x1_t b, const int lane) {
vfloat64m1_t b_s = __riscv_vslidedown_vx_f64m1(b, lane, 1);
*a = __riscv_vfmv_f_s_f64m1_f64(b_s);
}

// FORCE_INLINE void vst1q_lane_f64(float64_t * ptr, float64x2_t val, const int lane);
FORCE_INLINE void vst1q_lane_f64(float64_t *a, float64x2_t b, const int lane) {
vfloat64m1_t b_s = __riscv_vslidedown_vx_f64m1(b, lane, 2);
*a = __riscv_vfmv_f_s_f64m1_f64(b_s);
}

FORCE_INLINE void vst1q_lane_u8(uint8_t *a, uint8x16_t b, const int c) {
vuint8m1_t b_s = __riscv_vslidedown_vx_u8m1(b, c, 8);
Expand Down
44 changes: 42 additions & 2 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44327,9 +44327,49 @@ result_t test_vst1_lane_p16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { ret

result_t test_vst1q_lane_p16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }

result_t test_vst1_lane_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vst1_lane_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
double _a[1];
const double *_b = (const double *)impl.test_cases_float_pointer1;
float64x1_t b;
float64x1_t a;

#define TEST_IMPL(IDX) \
b = vld1_f64(_b); \
vst1_lane_f64(_a, b, IDX); \
a = vld1_f64(_a); \
CHECK_RESULT(validate_double(a, _b[IDX]))

IMM_1_ITER
#undef TEST_IMPL

return TEST_SUCCESS;
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vst1q_lane_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vst1q_lane_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
double _a[2];
const double *_b = (const double *)impl.test_cases_float_pointer1;
float64x2_t b;
float64x2_t a;

#define TEST_IMPL(IDX) \
b = vld1q_f64(_b); \
vst1q_lane_f64(_a, b, IDX); \
a = vld1q_f64(_a); \
CHECK_RESULT(validate_double(a, _b[IDX], _a[1]))

IMM_2_ITER
#undef TEST_IMPL

return TEST_SUCCESS;
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vst1q_lane_u8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
Expand Down
4 changes: 2 additions & 2 deletions tests/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2718,8 +2718,8 @@
_(vst1q_lane_u32) \
/*_(vst1q_lane_p8) */ \
/*_(vst1q_lane_p16) */ \
/*_(vst1_lane_f64) */ \
/*_(vst1q_lane_f64) */ \
_(vst1_lane_f64) \
_(vst1q_lane_f64) \
/*_(vst1q_lane_p64) */ \
_(vst1q_lane_s64) \
_(vst1q_lane_u64) \
Expand Down

0 comments on commit ad630ec

Please sign in to comment.