Skip to content

Commit

Permalink
feat: Add vp[max|min]nm[q]_[f32|f64]
Browse files Browse the repository at this point in the history
  • Loading branch information
howjmay committed Aug 2, 2024
1 parent e3bd6a9 commit fddef58
Show file tree
Hide file tree
Showing 5 changed files with 242 additions and 34 deletions.
112 changes: 90 additions & 22 deletions neon2rvv.h
Original file line number Diff line number Diff line change
Expand Up @@ -4278,8 +4278,8 @@ FORCE_INLINE int8x16_t vpmaxq_s8(int8x16_t a, int8x16_t b) {
vint8m2_t b_m2 = __riscv_vlmul_ext_v_i8m1_i8m2(b);
vint8m2_t ab = __riscv_vslideup_vx_i8m2(a_m2, b_m2, 16, 32);
vint8m2_t ab_s = __riscv_vslidedown_vx_i8m2(ab, 1, 32);
vint8m2_t ab_min = __riscv_vmax_vv_i8m2(ab, ab_s, 32);
return __riscv_vlmul_trunc_v_i8m2_i8m1(__riscv_vcompress_vm_i8m2(ab_min, mask, 32));
vint8m2_t ab_max = __riscv_vmax_vv_i8m2(ab, ab_s, 32);
return __riscv_vlmul_trunc_v_i8m2_i8m1(__riscv_vcompress_vm_i8m2(ab_max, mask, 32));
}

FORCE_INLINE int16x8_t vpmaxq_s16(int16x8_t a, int16x8_t b) {
Expand All @@ -4288,8 +4288,8 @@ FORCE_INLINE int16x8_t vpmaxq_s16(int16x8_t a, int16x8_t b) {
vint16m2_t b_m2 = __riscv_vlmul_ext_v_i16m1_i16m2(b);
vint16m2_t ab = __riscv_vslideup_vx_i16m2(a_m2, b_m2, 8, 16);
vint16m2_t ab_s = __riscv_vslidedown_vx_i16m2(ab, 1, 16);
vint16m2_t ab_min = __riscv_vmax_vv_i16m2(ab, ab_s, 16);
return __riscv_vlmul_trunc_v_i16m2_i16m1(__riscv_vcompress_vm_i16m2(ab_min, mask, 16));
vint16m2_t ab_max = __riscv_vmax_vv_i16m2(ab, ab_s, 16);
return __riscv_vlmul_trunc_v_i16m2_i16m1(__riscv_vcompress_vm_i16m2(ab_max, mask, 16));
}

FORCE_INLINE int32x4_t vpmaxq_s32(int32x4_t a, int32x4_t b) {
Expand All @@ -4298,8 +4298,8 @@ FORCE_INLINE int32x4_t vpmaxq_s32(int32x4_t a, int32x4_t b) {
vint32m2_t b_m2 = __riscv_vlmul_ext_v_i32m1_i32m2(b);
vint32m2_t ab = __riscv_vslideup_vx_i32m2(a_m2, b_m2, 4, 8);
vint32m2_t ab_s = __riscv_vslidedown_vx_i32m2(ab, 1, 8);
vint32m2_t ab_min = __riscv_vmax_vv_i32m2(ab, ab_s, 8);
return __riscv_vlmul_trunc_v_i32m2_i32m1(__riscv_vcompress_vm_i32m2(ab_min, mask, 8));
vint32m2_t ab_max = __riscv_vmax_vv_i32m2(ab, ab_s, 8);
return __riscv_vlmul_trunc_v_i32m2_i32m1(__riscv_vcompress_vm_i32m2(ab_max, mask, 8));
}

FORCE_INLINE uint8x16_t vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
Expand All @@ -4308,8 +4308,8 @@ FORCE_INLINE uint8x16_t vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
vuint8m2_t b_m2 = __riscv_vlmul_ext_v_u8m1_u8m2(b);
vuint8m2_t ab = __riscv_vslideup_vx_u8m2(a_m2, b_m2, 16, 32);
vuint8m2_t ab_s = __riscv_vslidedown_vx_u8m2(ab, 1, 32);
vuint8m2_t ab_min = __riscv_vmaxu_vv_u8m2(ab, ab_s, 32);
return __riscv_vlmul_trunc_v_u8m2_u8m1(__riscv_vcompress_vm_u8m2(ab_min, mask, 32));
vuint8m2_t ab_max = __riscv_vmaxu_vv_u8m2(ab, ab_s, 32);
return __riscv_vlmul_trunc_v_u8m2_u8m1(__riscv_vcompress_vm_u8m2(ab_max, mask, 32));
}

FORCE_INLINE uint16x8_t vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
Expand All @@ -4318,8 +4318,8 @@ FORCE_INLINE uint16x8_t vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
vuint16m2_t b_m2 = __riscv_vlmul_ext_v_u16m1_u16m2(b);
vuint16m2_t ab = __riscv_vslideup_vx_u16m2(a_m2, b_m2, 8, 16);
vuint16m2_t ab_s = __riscv_vslidedown_vx_u16m2(ab, 1, 16);
vuint16m2_t ab_min = __riscv_vmaxu_vv_u16m2(ab, ab_s, 16);
return __riscv_vlmul_trunc_v_u16m2_u16m1(__riscv_vcompress_vm_u16m2(ab_min, mask, 16));
vuint16m2_t ab_max = __riscv_vmaxu_vv_u16m2(ab, ab_s, 16);
return __riscv_vlmul_trunc_v_u16m2_u16m1(__riscv_vcompress_vm_u16m2(ab_max, mask, 16));
}

FORCE_INLINE uint32x4_t vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
Expand All @@ -4328,8 +4328,8 @@ FORCE_INLINE uint32x4_t vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
vuint32m2_t b_m2 = __riscv_vlmul_ext_v_u32m1_u32m2(b);
vuint32m2_t ab = __riscv_vslideup_vx_u32m2(a_m2, b_m2, 4, 8);
vuint32m2_t ab_s = __riscv_vslidedown_vx_u32m2(ab, 1, 8);
vuint32m2_t ab_min = __riscv_vmaxu_vv_u32m2(ab, ab_s, 8);
return __riscv_vlmul_trunc_v_u32m2_u32m1(__riscv_vcompress_vm_u32m2(ab_min, mask, 8));
vuint32m2_t ab_max = __riscv_vmaxu_vv_u32m2(ab, ab_s, 8);
return __riscv_vlmul_trunc_v_u32m2_u32m1(__riscv_vcompress_vm_u32m2(ab_max, mask, 8));
}

FORCE_INLINE float32x4_t vpmaxq_f32(float32x4_t a, float32x4_t b) {
Expand All @@ -4338,8 +4338,8 @@ FORCE_INLINE float32x4_t vpmaxq_f32(float32x4_t a, float32x4_t b) {
vfloat32m2_t b_m2 = __riscv_vlmul_ext_v_f32m1_f32m2(b);
vfloat32m2_t ab = __riscv_vslideup_vx_f32m2(a_m2, b_m2, 4, 8);
vfloat32m2_t ab_s = __riscv_vslidedown_vx_f32m2(ab, 1, 8);
vfloat32m2_t ab_min = __riscv_vfmax_vv_f32m2(ab, ab_s, 8);
return __riscv_vlmul_trunc_v_f32m2_f32m1(__riscv_vcompress_vm_f32m2(ab_min, mask, 8));
vfloat32m2_t ab_max = __riscv_vfmax_vv_f32m2(ab, ab_s, 8);
return __riscv_vlmul_trunc_v_f32m2_f32m1(__riscv_vcompress_vm_f32m2(ab_max, mask, 8));
}

FORCE_INLINE float64x2_t vpmaxq_f64(float64x2_t a, float64x2_t b) {
Expand All @@ -4348,8 +4348,8 @@ FORCE_INLINE float64x2_t vpmaxq_f64(float64x2_t a, float64x2_t b) {
vfloat64m2_t b_m2 = __riscv_vlmul_ext_v_f64m1_f64m2(b);
vfloat64m2_t ab = __riscv_vslideup_vx_f64m2(a_m2, b_m2, 2, 4);
vfloat64m2_t ab_s = __riscv_vslidedown_vx_f64m2(ab, 1, 4);
vfloat64m2_t ab_min = __riscv_vfmax_vv_f64m2(ab, ab_s, 4);
return __riscv_vlmul_trunc_v_f64m2_f64m1(__riscv_vcompress_vm_f64m2(ab_min, mask, 4));
vfloat64m2_t ab_max = __riscv_vfmax_vv_f64m2(ab, ab_s, 4);
return __riscv_vlmul_trunc_v_f64m2_f64m1(__riscv_vcompress_vm_f64m2(ab_max, mask, 4));
}

FORCE_INLINE uint8x8_t vpmax_u8(uint8x8_t a, uint8x8_t b) {
Expand Down Expand Up @@ -4488,17 +4488,85 @@ FORCE_INLINE float64x2_t vpminq_f64(float64x2_t a, float64x2_t b) {
return __riscv_vlmul_trunc_v_f64m2_f64m1(__riscv_vcompress_vm_f64m2(ab_min, mask, 4));
}

// FORCE_INLINE float32x2_t vpmaxnm_f32(float32x2_t a, float32x2_t b);
FORCE_INLINE float32x2_t vpmaxnm_f32(float32x2_t a, float32x2_t b) {
vbool32_t mask = __riscv_vreinterpret_v_u32m1_b32(vdup_n_u32(85));
vfloat32m1_t ab = __riscv_vslideup_vx_f32m1(a, b, 2, 4);
vfloat32m1_t ab_s = __riscv_vslidedown_vx_f32m1(ab, 1, 4);
vbool32_t ab_non_nan_mask = __riscv_vmfeq_vv_f32m1_b32(ab, ab, 4);
vbool32_t ab_s_non_nan_mask = __riscv_vmfeq_vv_f32m1_b32(ab_s, ab_s, 4);
vfloat32m1_t ab_replace = __riscv_vmerge_vvm_f32m1(ab_s, ab, ab_non_nan_mask, 4);
vfloat32m1_t ab_s_replace = __riscv_vmerge_vvm_f32m1(ab, ab_s, ab_s_non_nan_mask, 4);
vfloat32m1_t ab_max = __riscv_vfmax_vv_f32m1(ab_replace, ab_s_replace, 4);
return __riscv_vcompress_vm_f32m1(ab_max, mask, 4);
}

// FORCE_INLINE float32x4_t vpmaxnmq_f32(float32x4_t a, float32x4_t b);
FORCE_INLINE float32x4_t vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
vbool16_t mask = __riscv_vreinterpret_v_i8m1_b16(vdupq_n_s8(85));
vfloat32m2_t a_m2 = __riscv_vlmul_ext_v_f32m1_f32m2(a);
vfloat32m2_t b_m2 = __riscv_vlmul_ext_v_f32m1_f32m2(b);
vfloat32m2_t ab = __riscv_vslideup_vx_f32m2(a_m2, b_m2, 4, 8);
vfloat32m2_t ab_s = __riscv_vslidedown_vx_f32m2(ab, 1, 8);
vbool16_t ab_non_nan_mask = __riscv_vmfeq_vv_f32m2_b16(ab, ab, 8);
vbool16_t ab_s_non_nan_mask = __riscv_vmfeq_vv_f32m2_b16(ab_s, ab_s, 8);
vfloat32m2_t ab_replace = __riscv_vmerge_vvm_f32m2(ab_s, ab, ab_non_nan_mask, 8);
vfloat32m2_t ab_s_replace = __riscv_vmerge_vvm_f32m2(ab, ab_s, ab_s_non_nan_mask, 8);
vfloat32m2_t ab_max = __riscv_vfmax_vv_f32m2(ab_replace, ab_s_replace, 8);
return __riscv_vlmul_trunc_v_f32m2_f32m1(__riscv_vcompress_vm_f32m2(ab_max, mask, 8));
}

// FORCE_INLINE float64x2_t vpmaxnmq_f64(float64x2_t a, float64x2_t b);
FORCE_INLINE float64x2_t vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
vbool32_t mask = __riscv_vreinterpret_v_i8m1_b32(vdupq_n_s8(85));
vfloat64m2_t a_m2 = __riscv_vlmul_ext_v_f64m1_f64m2(a);
vfloat64m2_t b_m2 = __riscv_vlmul_ext_v_f64m1_f64m2(b);
vfloat64m2_t ab = __riscv_vslideup_vx_f64m2(a_m2, b_m2, 2, 4);
vfloat64m2_t ab_s = __riscv_vslidedown_vx_f64m2(ab, 1, 4);
vbool32_t ab_non_nan_mask = __riscv_vmfeq_vv_f64m2_b32(ab, ab, 4);
vbool32_t ab_s_non_nan_mask = __riscv_vmfeq_vv_f64m2_b32(ab_s, ab_s, 4);
vfloat64m2_t ab_replace = __riscv_vmerge_vvm_f64m2(ab_s, ab, ab_non_nan_mask, 4);
vfloat64m2_t ab_s_replace = __riscv_vmerge_vvm_f64m2(ab, ab_s, ab_s_non_nan_mask, 4);
vfloat64m2_t ab_max = __riscv_vfmax_vv_f64m2(ab_replace, ab_s_replace, 4);
return __riscv_vlmul_trunc_v_f64m2_f64m1(__riscv_vcompress_vm_f64m2(ab_max, mask, 4));
}

// FORCE_INLINE float32x2_t vpminnm_f32(float32x2_t a, float32x2_t b);
FORCE_INLINE float32x2_t vpminnm_f32(float32x2_t a, float32x2_t b) {
vbool32_t mask = __riscv_vreinterpret_v_u32m1_b32(vdup_n_u32(85));
vfloat32m1_t ab = __riscv_vslideup_vx_f32m1(a, b, 2, 4);
vfloat32m1_t ab_s = __riscv_vslidedown_vx_f32m1(ab, 1, 4);
vbool32_t ab_non_nan_mask = __riscv_vmfeq_vv_f32m1_b32(ab, ab, 4);
vbool32_t ab_s_non_nan_mask = __riscv_vmfeq_vv_f32m1_b32(ab_s, ab_s, 4);
vfloat32m1_t ab_replace = __riscv_vmerge_vvm_f32m1(ab_s, ab, ab_non_nan_mask, 4);
vfloat32m1_t ab_s_replace = __riscv_vmerge_vvm_f32m1(ab, ab_s, ab_s_non_nan_mask, 4);
vfloat32m1_t ab_max = __riscv_vfmin_vv_f32m1(ab_replace, ab_s_replace, 4);
return __riscv_vcompress_vm_f32m1(ab_max, mask, 4);
}

// FORCE_INLINE float32x4_t vpminnmq_f32(float32x4_t a, float32x4_t b);
FORCE_INLINE float32x4_t vpminnmq_f32(float32x4_t a, float32x4_t b) {
vbool16_t mask = __riscv_vreinterpret_v_i8m1_b16(vdupq_n_s8(85));
vfloat32m2_t a_m2 = __riscv_vlmul_ext_v_f32m1_f32m2(a);
vfloat32m2_t b_m2 = __riscv_vlmul_ext_v_f32m1_f32m2(b);
vfloat32m2_t ab = __riscv_vslideup_vx_f32m2(a_m2, b_m2, 4, 8);
vfloat32m2_t ab_s = __riscv_vslidedown_vx_f32m2(ab, 1, 8);
vbool16_t ab_non_nan_mask = __riscv_vmfeq_vv_f32m2_b16(ab, ab, 8);
vbool16_t ab_s_non_nan_mask = __riscv_vmfeq_vv_f32m2_b16(ab_s, ab_s, 8);
vfloat32m2_t ab_replace = __riscv_vmerge_vvm_f32m2(ab_s, ab, ab_non_nan_mask, 8);
vfloat32m2_t ab_s_replace = __riscv_vmerge_vvm_f32m2(ab, ab_s, ab_s_non_nan_mask, 8);
vfloat32m2_t ab_max = __riscv_vfmin_vv_f32m2(ab_replace, ab_s_replace, 8);
return __riscv_vlmul_trunc_v_f32m2_f32m1(__riscv_vcompress_vm_f32m2(ab_max, mask, 8));
}

// FORCE_INLINE float64x2_t vpminnmq_f64(float64x2_t a, float64x2_t b);
FORCE_INLINE float64x2_t vpminnmq_f64(float64x2_t a, float64x2_t b) {
vbool32_t mask = __riscv_vreinterpret_v_i8m1_b32(vdupq_n_s8(85));
vfloat64m2_t a_m2 = __riscv_vlmul_ext_v_f64m1_f64m2(a);
vfloat64m2_t b_m2 = __riscv_vlmul_ext_v_f64m1_f64m2(b);
vfloat64m2_t ab = __riscv_vslideup_vx_f64m2(a_m2, b_m2, 2, 4);
vfloat64m2_t ab_s = __riscv_vslidedown_vx_f64m2(ab, 1, 4);
vbool32_t ab_non_nan_mask = __riscv_vmfeq_vv_f64m2_b32(ab, ab, 4);
vbool32_t ab_s_non_nan_mask = __riscv_vmfeq_vv_f64m2_b32(ab_s, ab_s, 4);
vfloat64m2_t ab_replace = __riscv_vmerge_vvm_f64m2(ab_s, ab, ab_non_nan_mask, 4);
vfloat64m2_t ab_s_replace = __riscv_vmerge_vvm_f64m2(ab, ab_s, ab_s_non_nan_mask, 4);
vfloat64m2_t ab_max = __riscv_vfmin_vv_f64m2(ab_replace, ab_s_replace, 4);
return __riscv_vlmul_trunc_v_f64m2_f64m1(__riscv_vcompress_vm_f64m2(ab_max, mask, 4));
}

FORCE_INLINE int64_t vpaddd_s64(int64x2_t a) {
return __riscv_vmv_x_s_i64m1_i64(__riscv_vredsum_vs_i64m1_i64m1(a, __riscv_vmv_v_x_i64m1(0, 2), 2));
Expand Down
33 changes: 33 additions & 0 deletions tests/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -895,4 +895,37 @@ double bankers_rounding(double val) {
return ret;
}

float maxnm(float a, float b) {
if (std::isnan(a) && !std::isnan(b)) {
a = b;
} else if (!std::isnan(a) && std::isnan(b)) {
b = a;
}
return a > b ? a : b;
}
float minnm(float a, float b) {
if (std::isnan(a) && !std::isnan(b)) {
a = b;
} else if (!std::isnan(a) && std::isnan(b)) {
b = a;
}
return a < b ? a : b;
}
double maxnm(double a, double b) {
if (std::isnan(a) && !std::isnan(b)) {
a = b;
} else if (!std::isnan(a) && std::isnan(b)) {
b = a;
}
return a > b ? a : b;
}
double minnm(double a, double b) {
if (std::isnan(a) && !std::isnan(b)) {
a = b;
} else if (!std::isnan(a) && std::isnan(b)) {
b = a;
}
return a < b ? a : b;
}

} // namespace NEON2RVV
5 changes: 5 additions & 0 deletions tests/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,11 @@ static void merge_arrays(const T *arr1, const T *arr2, const T *arr3, const T *a
float bankers_rounding(float val);
double bankers_rounding(double val);

float maxnm(float a, float b);
float minnm(float a, float b);
double maxnm(double a, double b);
double minnm(double a, double b);

#define CHECK_RESULT(EXP) \
if ((EXP) != TEST_SUCCESS) { \
return TEST_FAIL; \
Expand Down
114 changes: 108 additions & 6 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15775,17 +15775,119 @@ result_t test_vpminq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#endif // ENABLE_TEST_ALL
}

result_t test_vpmaxnm_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vpmaxnm_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const float *_a = (const float *)impl.test_cases_float_pointer1;
const float *_b = (const float *)impl.test_cases_float_pointer2;
float _c[2];
for (int i = 0; i < 1; i++) {
_c[i] = maxnm(_a[2 * i], _a[2 * i + 1]);
_c[i + 1] = maxnm(_b[2 * i], _b[2 * i + 1]);
}

float32x2_t a = vld1_f32(_a);
float32x2_t b = vld1_f32(_b);
float32x2_t c = vpmaxnm_f32(a, b);
return validate_float(c, _c[0], _c[1]);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vpmaxnmq_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const float *_a = (const float *)impl.test_cases_float_pointer1;
const float *_b = (const float *)impl.test_cases_float_pointer2;
float _c[4];
for (int i = 0; i < 2; i++) {
_c[i] = maxnm(_a[2 * i], _a[2 * i + 1]);
_c[i + 2] = maxnm(_b[2 * i], _b[2 * i + 1]);
}

float32x4_t a = vld1q_f32(_a);
float32x4_t b = vld1q_f32(_b);
float32x4_t c = vpmaxnmq_f32(a, b);
return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vpmaxnmq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const double *_a = (const double *)impl.test_cases_float_pointer1;
const double *_b = (const double *)impl.test_cases_float_pointer2;
double _c[2];
for (int i = 0; i < 1; i++) {
_c[i] = maxnm(_a[2 * i], _a[2 * i + 1]);
_c[i + 1] = maxnm(_b[2 * i], _b[2 * i + 1]);
}

float64x2_t a = vld1q_f64(_a);
float64x2_t b = vld1q_f64(_b);
float64x2_t c = vpmaxnmq_f64(a, b);
return validate_double(c, _c[0], _c[1]);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vpmaxnmq_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vpminnm_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const float *_a = (const float *)impl.test_cases_float_pointer1;
const float *_b = (const float *)impl.test_cases_float_pointer2;
float _c[2];
for (int i = 0; i < 1; i++) {
_c[i] = minnm(_a[2 * i], _a[2 * i + 1]);
_c[i + 1] = minnm(_b[2 * i], _b[2 * i + 1]);
}

result_t test_vpmaxnmq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
float32x2_t a = vld1_f32(_a);
float32x2_t b = vld1_f32(_b);
float32x2_t c = vpminnm_f32(a, b);
return validate_float(c, _c[0], _c[1]);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vpminnmq_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const float *_a = (const float *)impl.test_cases_float_pointer1;
const float *_b = (const float *)impl.test_cases_float_pointer2;
float _c[4];
for (int i = 0; i < 2; i++) {
_c[i] = minnm(_a[2 * i], _a[2 * i + 1]);
_c[i + 2] = minnm(_b[2 * i], _b[2 * i + 1]);
}

result_t test_vpminnm_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
float32x4_t a = vld1q_f32(_a);
float32x4_t b = vld1q_f32(_b);
float32x4_t c = vpminnmq_f32(a, b);
return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vpminnmq_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vpminnmq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const double *_a = (const double *)impl.test_cases_float_pointer1;
const double *_b = (const double *)impl.test_cases_float_pointer2;
double _c[2];
for (int i = 0; i < 1; i++) {
_c[i] = minnm(_a[2 * i], _a[2 * i + 1]);
_c[i + 1] = minnm(_b[2 * i], _b[2 * i + 1]);
}

result_t test_vpminnmq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
float64x2_t a = vld1q_f64(_a);
float64x2_t b = vld1q_f64(_b);
float64x2_t c = vpminnmq_f64(a, b);
return validate_double(c, _c[0], _c[1]);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vpaddd_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
Expand Down
12 changes: 6 additions & 6 deletions tests/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -869,12 +869,12 @@
_(vpminq_u32) \
_(vpminq_f32) \
_(vpminq_f64) \
/*_(vpmaxnm_f32) */ \
/*_(vpmaxnmq_f32) */ \
/*_(vpmaxnmq_f64) */ \
/*_(vpminnm_f32) */ \
/*_(vpminnmq_f32) */ \
/*_(vpminnmq_f64) */ \
_(vpmaxnm_f32) \
_(vpmaxnmq_f32) \
_(vpmaxnmq_f64) \
_(vpminnm_f32) \
_(vpminnmq_f32) \
_(vpminnmq_f64) \
_(vpaddd_s64) \
_(vpaddd_u64) \
_(vpadds_f32) \
Expand Down

0 comments on commit fddef58

Please sign in to comment.