diff --git a/neon2rvv.h b/neon2rvv.h index e4241800..9bd6570b 100644 --- a/neon2rvv.h +++ b/neon2rvv.h @@ -2783,7 +2783,15 @@ FORCE_INLINE int8x8x2_t vld2_s8(const int8_t *__a) { return __riscv_vlseg2e8_v_i // FORCE_INLINE uint32x4x2_t vld2q_u32(const uint32_t *__a); -// FORCE_INLINE int8x8x2_t vld2_lane_s8(const int8_t *__a, int8x8x2_t __b, const int __c); +FORCE_INLINE int8x8x2_t vld2_lane_s8(const int8_t *__a, int8x8x2_t __b, const int __c) { + const uint8_t mask_arr[] = {(uint8_t)(1 << __c)}; + vbool8_t mask = __riscv_vlm_v_b8(mask_arr, 1); + vint8m1_t a_dup0 = vdup_n_s8(__a[0]); + vint8m1_t a_dup1 = vdup_n_s8(__a[1]); + vint8m1_t c0 = __riscv_vmerge_vvm_i8m1(__riscv_vget_v_i8m1x2_i8m1(__b, 0), a_dup0, mask, 8); + vint8m1_t c1 = __riscv_vmerge_vvm_i8m1(__riscv_vget_v_i8m1x2_i8m1(__b, 1), a_dup1, mask, 8); + return __riscv_vcreate_v_i8m1x2(c0, c1); +} // FORCE_INLINE int16x4x2_t vld2_lane_s16(const int16_t *__a, int16x4x2_t __b, const int __c); diff --git a/tests/impl.cpp b/tests/impl.cpp index 8434d750..736bcb41 100644 --- a/tests/impl.cpp +++ b/tests/impl.cpp @@ -3666,7 +3666,54 @@ result_t test_vld2q_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return result_t test_vld2q_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } -result_t test_vld2_lane_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } +result_t test_vld2_lane_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { + const int8_t *_a = (int8_t *)impl.test_cases_int_pointer1; + const int8_t *_b = (int8_t *)impl.test_cases_int_pointer2; + int8x8x2_t c; + int8x8x2_t b = vld2_s8(_b); + c = vld2_lane_s8(_a, b, 0); + if (TEST_FAIL == validate_int8(c, _a[0], _b[2], _b[4], _b[6], _b[8], _b[10], _b[12], _b[14], _a[1], _b[3], _b[5], + _b[7], _b[9], _b[11], _b[13], _b[15])) { + return TEST_FAIL; + } + c = vld2_lane_s8(_a, b, 1); + if (TEST_FAIL == validate_int8(c, _b[0], _a[0], _b[4], _b[6], _b[8], _b[10], _b[12], _b[14], _b[1], _a[1], _b[5], + _b[7], _b[9], _b[11], _b[13], _b[15])) { + return TEST_FAIL; + } + c = vld2_lane_s8(_a, b, 2); + if (TEST_FAIL == validate_int8(c, _b[0], _b[2], _a[0], _b[6], _b[8], _b[10], _b[12], _b[14], _b[1], _b[3], _a[1], + _b[7], _b[9], _b[11], _b[13], _b[15])) { + return TEST_FAIL; + } + c = vld2_lane_s8(_a, b, 3); + if (TEST_FAIL == validate_int8(c, _b[0], _b[2], _b[4], _a[0], _b[8], _b[10], _b[12], _b[14], _b[1], _b[3], _b[5], + _a[1], _b[9], _b[11], _b[13], _b[15])) { + return TEST_FAIL; + } + c = vld2_lane_s8(_a, b, 4); + if (TEST_FAIL == validate_int8(c, _b[0], _b[2], _b[4], _b[6], _a[0], _b[10], _b[12], _b[14], _b[1], _b[3], _b[5], + _b[7], _a[1], _b[11], _b[13], _b[15])) { + return TEST_FAIL; + } + c = vld2_lane_s8(_a, b, 5); + if (TEST_FAIL == validate_int8(c, _b[0], _b[2], _b[4], _b[6], _b[8], _a[0], _b[12], _b[14], _b[1], _b[3], _b[5], + _b[7], _b[9], _a[1], _b[13], _b[15])) { + return TEST_FAIL; + } + c = vld2_lane_s8(_a, b, 6); + if (TEST_FAIL == validate_int8(c, _b[0], _b[2], _b[4], _b[6], _b[8], _b[10], _a[0], _b[14], _b[1], _b[3], _b[5], + _b[7], _b[9], _b[11], _a[1], _b[15])) { + return TEST_FAIL; + } + c = vld2_lane_s8(_a, b, 7); + if (TEST_FAIL == validate_int8(c, _b[0], _b[2], _b[4], _b[6], _b[8], _b[10], _b[12], _a[0], _b[1], _b[3], _b[5], + _b[7], _b[9], _b[11], _b[13], _a[1])) { + return TEST_FAIL; + } + + return TEST_SUCCESS; +} result_t test_vld2_lane_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }