feat: Add vp[max|min]nm[q]_[f32|f64]

howjmay · Aug 2, 2024 · fddef58 · fddef58
1 parent e3bd6a9
commit fddef58
Show file tree

Hide file tree

Showing 5 changed files with 242 additions and 34 deletions.
diff --git a/neon2rvv.h b/neon2rvv.h
@@ -4278,8 +4278,8 @@ FORCE_INLINE int8x16_t vpmaxq_s8(int8x16_t a, int8x16_t b) {
   vint8m2_t b_m2 = __riscv_vlmul_ext_v_i8m1_i8m2(b);
   vint8m2_t ab = __riscv_vslideup_vx_i8m2(a_m2, b_m2, 16, 32);
   vint8m2_t ab_s = __riscv_vslidedown_vx_i8m2(ab, 1, 32);
-  vint8m2_t ab_min = __riscv_vmax_vv_i8m2(ab, ab_s, 32);
-  return __riscv_vlmul_trunc_v_i8m2_i8m1(__riscv_vcompress_vm_i8m2(ab_min, mask, 32));
+  vint8m2_t ab_max = __riscv_vmax_vv_i8m2(ab, ab_s, 32);
+  return __riscv_vlmul_trunc_v_i8m2_i8m1(__riscv_vcompress_vm_i8m2(ab_max, mask, 32));
 }
 
 FORCE_INLINE int16x8_t vpmaxq_s16(int16x8_t a, int16x8_t b) {
@@ -4288,8 +4288,8 @@ FORCE_INLINE int16x8_t vpmaxq_s16(int16x8_t a, int16x8_t b) {
   vint16m2_t b_m2 = __riscv_vlmul_ext_v_i16m1_i16m2(b);
   vint16m2_t ab = __riscv_vslideup_vx_i16m2(a_m2, b_m2, 8, 16);
   vint16m2_t ab_s = __riscv_vslidedown_vx_i16m2(ab, 1, 16);
-  vint16m2_t ab_min = __riscv_vmax_vv_i16m2(ab, ab_s, 16);
-  return __riscv_vlmul_trunc_v_i16m2_i16m1(__riscv_vcompress_vm_i16m2(ab_min, mask, 16));
+  vint16m2_t ab_max = __riscv_vmax_vv_i16m2(ab, ab_s, 16);
+  return __riscv_vlmul_trunc_v_i16m2_i16m1(__riscv_vcompress_vm_i16m2(ab_max, mask, 16));
 }
 
 FORCE_INLINE int32x4_t vpmaxq_s32(int32x4_t a, int32x4_t b) {
@@ -4298,8 +4298,8 @@ FORCE_INLINE int32x4_t vpmaxq_s32(int32x4_t a, int32x4_t b) {
   vint32m2_t b_m2 = __riscv_vlmul_ext_v_i32m1_i32m2(b);
   vint32m2_t ab = __riscv_vslideup_vx_i32m2(a_m2, b_m2, 4, 8);
   vint32m2_t ab_s = __riscv_vslidedown_vx_i32m2(ab, 1, 8);
-  vint32m2_t ab_min = __riscv_vmax_vv_i32m2(ab, ab_s, 8);
-  return __riscv_vlmul_trunc_v_i32m2_i32m1(__riscv_vcompress_vm_i32m2(ab_min, mask, 8));
+  vint32m2_t ab_max = __riscv_vmax_vv_i32m2(ab, ab_s, 8);
+  return __riscv_vlmul_trunc_v_i32m2_i32m1(__riscv_vcompress_vm_i32m2(ab_max, mask, 8));
 }
 
 FORCE_INLINE uint8x16_t vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
@@ -4308,8 +4308,8 @@ FORCE_INLINE uint8x16_t vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
   vuint8m2_t b_m2 = __riscv_vlmul_ext_v_u8m1_u8m2(b);
   vuint8m2_t ab = __riscv_vslideup_vx_u8m2(a_m2, b_m2, 16, 32);
   vuint8m2_t ab_s = __riscv_vslidedown_vx_u8m2(ab, 1, 32);
-  vuint8m2_t ab_min = __riscv_vmaxu_vv_u8m2(ab, ab_s, 32);
-  return __riscv_vlmul_trunc_v_u8m2_u8m1(__riscv_vcompress_vm_u8m2(ab_min, mask, 32));
+  vuint8m2_t ab_max = __riscv_vmaxu_vv_u8m2(ab, ab_s, 32);
+  return __riscv_vlmul_trunc_v_u8m2_u8m1(__riscv_vcompress_vm_u8m2(ab_max, mask, 32));
 }
 
 FORCE_INLINE uint16x8_t vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
@@ -4318,8 +4318,8 @@ FORCE_INLINE uint16x8_t vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
   vuint16m2_t b_m2 = __riscv_vlmul_ext_v_u16m1_u16m2(b);
   vuint16m2_t ab = __riscv_vslideup_vx_u16m2(a_m2, b_m2, 8, 16);
   vuint16m2_t ab_s = __riscv_vslidedown_vx_u16m2(ab, 1, 16);
-  vuint16m2_t ab_min = __riscv_vmaxu_vv_u16m2(ab, ab_s, 16);
-  return __riscv_vlmul_trunc_v_u16m2_u16m1(__riscv_vcompress_vm_u16m2(ab_min, mask, 16));
+  vuint16m2_t ab_max = __riscv_vmaxu_vv_u16m2(ab, ab_s, 16);
+  return __riscv_vlmul_trunc_v_u16m2_u16m1(__riscv_vcompress_vm_u16m2(ab_max, mask, 16));
 }
 
 FORCE_INLINE uint32x4_t vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
@@ -4328,8 +4328,8 @@ FORCE_INLINE uint32x4_t vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
   vuint32m2_t b_m2 = __riscv_vlmul_ext_v_u32m1_u32m2(b);
   vuint32m2_t ab = __riscv_vslideup_vx_u32m2(a_m2, b_m2, 4, 8);
   vuint32m2_t ab_s = __riscv_vslidedown_vx_u32m2(ab, 1, 8);
-  vuint32m2_t ab_min = __riscv_vmaxu_vv_u32m2(ab, ab_s, 8);
-  return __riscv_vlmul_trunc_v_u32m2_u32m1(__riscv_vcompress_vm_u32m2(ab_min, mask, 8));
+  vuint32m2_t ab_max = __riscv_vmaxu_vv_u32m2(ab, ab_s, 8);
+  return __riscv_vlmul_trunc_v_u32m2_u32m1(__riscv_vcompress_vm_u32m2(ab_max, mask, 8));
 }
 
 FORCE_INLINE float32x4_t vpmaxq_f32(float32x4_t a, float32x4_t b) {
@@ -4338,8 +4338,8 @@ FORCE_INLINE float32x4_t vpmaxq_f32(float32x4_t a, float32x4_t b) {
   vfloat32m2_t b_m2 = __riscv_vlmul_ext_v_f32m1_f32m2(b);
   vfloat32m2_t ab = __riscv_vslideup_vx_f32m2(a_m2, b_m2, 4, 8);
   vfloat32m2_t ab_s = __riscv_vslidedown_vx_f32m2(ab, 1, 8);
-  vfloat32m2_t ab_min = __riscv_vfmax_vv_f32m2(ab, ab_s, 8);
-  return __riscv_vlmul_trunc_v_f32m2_f32m1(__riscv_vcompress_vm_f32m2(ab_min, mask, 8));
+  vfloat32m2_t ab_max = __riscv_vfmax_vv_f32m2(ab, ab_s, 8);
+  return __riscv_vlmul_trunc_v_f32m2_f32m1(__riscv_vcompress_vm_f32m2(ab_max, mask, 8));
 }
 
 FORCE_INLINE float64x2_t vpmaxq_f64(float64x2_t a, float64x2_t b) {
@@ -4348,8 +4348,8 @@ FORCE_INLINE float64x2_t vpmaxq_f64(float64x2_t a, float64x2_t b) {
   vfloat64m2_t b_m2 = __riscv_vlmul_ext_v_f64m1_f64m2(b);
   vfloat64m2_t ab = __riscv_vslideup_vx_f64m2(a_m2, b_m2, 2, 4);
   vfloat64m2_t ab_s = __riscv_vslidedown_vx_f64m2(ab, 1, 4);
-  vfloat64m2_t ab_min = __riscv_vfmax_vv_f64m2(ab, ab_s, 4);
-  return __riscv_vlmul_trunc_v_f64m2_f64m1(__riscv_vcompress_vm_f64m2(ab_min, mask, 4));
+  vfloat64m2_t ab_max = __riscv_vfmax_vv_f64m2(ab, ab_s, 4);
+  return __riscv_vlmul_trunc_v_f64m2_f64m1(__riscv_vcompress_vm_f64m2(ab_max, mask, 4));
 }
 
 FORCE_INLINE uint8x8_t vpmax_u8(uint8x8_t a, uint8x8_t b) {
@@ -4488,17 +4488,85 @@ FORCE_INLINE float64x2_t vpminq_f64(float64x2_t a, float64x2_t b) {
   return __riscv_vlmul_trunc_v_f64m2_f64m1(__riscv_vcompress_vm_f64m2(ab_min, mask, 4));
 }
 
-// FORCE_INLINE float32x2_t vpmaxnm_f32(float32x2_t a, float32x2_t b);
+FORCE_INLINE float32x2_t vpmaxnm_f32(float32x2_t a, float32x2_t b) {
+  vbool32_t mask = __riscv_vreinterpret_v_u32m1_b32(vdup_n_u32(85));
+  vfloat32m1_t ab = __riscv_vslideup_vx_f32m1(a, b, 2, 4);
+  vfloat32m1_t ab_s = __riscv_vslidedown_vx_f32m1(ab, 1, 4);
+  vbool32_t ab_non_nan_mask = __riscv_vmfeq_vv_f32m1_b32(ab, ab, 4);
+  vbool32_t ab_s_non_nan_mask = __riscv_vmfeq_vv_f32m1_b32(ab_s, ab_s, 4);
+  vfloat32m1_t ab_replace = __riscv_vmerge_vvm_f32m1(ab_s, ab, ab_non_nan_mask, 4);
+  vfloat32m1_t ab_s_replace = __riscv_vmerge_vvm_f32m1(ab, ab_s, ab_s_non_nan_mask, 4);
+  vfloat32m1_t ab_max = __riscv_vfmax_vv_f32m1(ab_replace, ab_s_replace, 4);
+  return __riscv_vcompress_vm_f32m1(ab_max, mask, 4);
+}
 
-// FORCE_INLINE float32x4_t vpmaxnmq_f32(float32x4_t a, float32x4_t b);
+FORCE_INLINE float32x4_t vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
+  vbool16_t mask = __riscv_vreinterpret_v_i8m1_b16(vdupq_n_s8(85));
+  vfloat32m2_t a_m2 = __riscv_vlmul_ext_v_f32m1_f32m2(a);
+  vfloat32m2_t b_m2 = __riscv_vlmul_ext_v_f32m1_f32m2(b);
+  vfloat32m2_t ab = __riscv_vslideup_vx_f32m2(a_m2, b_m2, 4, 8);
+  vfloat32m2_t ab_s = __riscv_vslidedown_vx_f32m2(ab, 1, 8);
+  vbool16_t ab_non_nan_mask = __riscv_vmfeq_vv_f32m2_b16(ab, ab, 8);
+  vbool16_t ab_s_non_nan_mask = __riscv_vmfeq_vv_f32m2_b16(ab_s, ab_s, 8);
+  vfloat32m2_t ab_replace = __riscv_vmerge_vvm_f32m2(ab_s, ab, ab_non_nan_mask, 8);
+  vfloat32m2_t ab_s_replace = __riscv_vmerge_vvm_f32m2(ab, ab_s, ab_s_non_nan_mask, 8);
+  vfloat32m2_t ab_max = __riscv_vfmax_vv_f32m2(ab_replace, ab_s_replace, 8);
+  return __riscv_vlmul_trunc_v_f32m2_f32m1(__riscv_vcompress_vm_f32m2(ab_max, mask, 8));
+}
 
-// FORCE_INLINE float64x2_t vpmaxnmq_f64(float64x2_t a, float64x2_t b);
+FORCE_INLINE float64x2_t vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
+  vbool32_t mask = __riscv_vreinterpret_v_i8m1_b32(vdupq_n_s8(85));
+  vfloat64m2_t a_m2 = __riscv_vlmul_ext_v_f64m1_f64m2(a);
+  vfloat64m2_t b_m2 = __riscv_vlmul_ext_v_f64m1_f64m2(b);
+  vfloat64m2_t ab = __riscv_vslideup_vx_f64m2(a_m2, b_m2, 2, 4);
+  vfloat64m2_t ab_s = __riscv_vslidedown_vx_f64m2(ab, 1, 4);
+  vbool32_t ab_non_nan_mask = __riscv_vmfeq_vv_f64m2_b32(ab, ab, 4);
+  vbool32_t ab_s_non_nan_mask = __riscv_vmfeq_vv_f64m2_b32(ab_s, ab_s, 4);
+  vfloat64m2_t ab_replace = __riscv_vmerge_vvm_f64m2(ab_s, ab, ab_non_nan_mask, 4);
+  vfloat64m2_t ab_s_replace = __riscv_vmerge_vvm_f64m2(ab, ab_s, ab_s_non_nan_mask, 4);
+  vfloat64m2_t ab_max = __riscv_vfmax_vv_f64m2(ab_replace, ab_s_replace, 4);
+  return __riscv_vlmul_trunc_v_f64m2_f64m1(__riscv_vcompress_vm_f64m2(ab_max, mask, 4));
+}
 
-// FORCE_INLINE float32x2_t vpminnm_f32(float32x2_t a, float32x2_t b);
+FORCE_INLINE float32x2_t vpminnm_f32(float32x2_t a, float32x2_t b) {
+  vbool32_t mask = __riscv_vreinterpret_v_u32m1_b32(vdup_n_u32(85));
+  vfloat32m1_t ab = __riscv_vslideup_vx_f32m1(a, b, 2, 4);
+  vfloat32m1_t ab_s = __riscv_vslidedown_vx_f32m1(ab, 1, 4);
+  vbool32_t ab_non_nan_mask = __riscv_vmfeq_vv_f32m1_b32(ab, ab, 4);
+  vbool32_t ab_s_non_nan_mask = __riscv_vmfeq_vv_f32m1_b32(ab_s, ab_s, 4);
+  vfloat32m1_t ab_replace = __riscv_vmerge_vvm_f32m1(ab_s, ab, ab_non_nan_mask, 4);
+  vfloat32m1_t ab_s_replace = __riscv_vmerge_vvm_f32m1(ab, ab_s, ab_s_non_nan_mask, 4);
+  vfloat32m1_t ab_max = __riscv_vfmin_vv_f32m1(ab_replace, ab_s_replace, 4);
+  return __riscv_vcompress_vm_f32m1(ab_max, mask, 4);
+}
 
-// FORCE_INLINE float32x4_t vpminnmq_f32(float32x4_t a, float32x4_t b);
+FORCE_INLINE float32x4_t vpminnmq_f32(float32x4_t a, float32x4_t b) {
+  vbool16_t mask = __riscv_vreinterpret_v_i8m1_b16(vdupq_n_s8(85));
+  vfloat32m2_t a_m2 = __riscv_vlmul_ext_v_f32m1_f32m2(a);
+  vfloat32m2_t b_m2 = __riscv_vlmul_ext_v_f32m1_f32m2(b);
+  vfloat32m2_t ab = __riscv_vslideup_vx_f32m2(a_m2, b_m2, 4, 8);
+  vfloat32m2_t ab_s = __riscv_vslidedown_vx_f32m2(ab, 1, 8);
+  vbool16_t ab_non_nan_mask = __riscv_vmfeq_vv_f32m2_b16(ab, ab, 8);
+  vbool16_t ab_s_non_nan_mask = __riscv_vmfeq_vv_f32m2_b16(ab_s, ab_s, 8);
+  vfloat32m2_t ab_replace = __riscv_vmerge_vvm_f32m2(ab_s, ab, ab_non_nan_mask, 8);
+  vfloat32m2_t ab_s_replace = __riscv_vmerge_vvm_f32m2(ab, ab_s, ab_s_non_nan_mask, 8);
+  vfloat32m2_t ab_max = __riscv_vfmin_vv_f32m2(ab_replace, ab_s_replace, 8);
+  return __riscv_vlmul_trunc_v_f32m2_f32m1(__riscv_vcompress_vm_f32m2(ab_max, mask, 8));
+}
 
-// FORCE_INLINE float64x2_t vpminnmq_f64(float64x2_t a, float64x2_t b);
+FORCE_INLINE float64x2_t vpminnmq_f64(float64x2_t a, float64x2_t b) {
+  vbool32_t mask = __riscv_vreinterpret_v_i8m1_b32(vdupq_n_s8(85));
+  vfloat64m2_t a_m2 = __riscv_vlmul_ext_v_f64m1_f64m2(a);
+  vfloat64m2_t b_m2 = __riscv_vlmul_ext_v_f64m1_f64m2(b);
+  vfloat64m2_t ab = __riscv_vslideup_vx_f64m2(a_m2, b_m2, 2, 4);
+  vfloat64m2_t ab_s = __riscv_vslidedown_vx_f64m2(ab, 1, 4);
+  vbool32_t ab_non_nan_mask = __riscv_vmfeq_vv_f64m2_b32(ab, ab, 4);
+  vbool32_t ab_s_non_nan_mask = __riscv_vmfeq_vv_f64m2_b32(ab_s, ab_s, 4);
+  vfloat64m2_t ab_replace = __riscv_vmerge_vvm_f64m2(ab_s, ab, ab_non_nan_mask, 4);
+  vfloat64m2_t ab_s_replace = __riscv_vmerge_vvm_f64m2(ab, ab_s, ab_s_non_nan_mask, 4);
+  vfloat64m2_t ab_max = __riscv_vfmin_vv_f64m2(ab_replace, ab_s_replace, 4);
+  return __riscv_vlmul_trunc_v_f64m2_f64m1(__riscv_vcompress_vm_f64m2(ab_max, mask, 4));
+}
 
 FORCE_INLINE int64_t vpaddd_s64(int64x2_t a) {
   return __riscv_vmv_x_s_i64m1_i64(__riscv_vredsum_vs_i64m1_i64m1(a, __riscv_vmv_v_x_i64m1(0, 2), 2));

diff --git a/tests/common.cpp b/tests/common.cpp
@@ -895,4 +895,37 @@ double bankers_rounding(double val) {
   return ret;
 }
 
+float maxnm(float a, float b) {
+  if (std::isnan(a) && !std::isnan(b)) {
+    a = b;
+  } else if (!std::isnan(a) && std::isnan(b)) {
+    b = a;
+  }
+  return a > b ? a : b;
+}
+float minnm(float a, float b) {
+  if (std::isnan(a) && !std::isnan(b)) {
+    a = b;
+  } else if (!std::isnan(a) && std::isnan(b)) {
+    b = a;
+  }
+  return a < b ? a : b;
+}
+double maxnm(double a, double b) {
+  if (std::isnan(a) && !std::isnan(b)) {
+    a = b;
+  } else if (!std::isnan(a) && std::isnan(b)) {
+    b = a;
+  }
+  return a > b ? a : b;
+}
+double minnm(double a, double b) {
+  if (std::isnan(a) && !std::isnan(b)) {
+    a = b;
+  } else if (!std::isnan(a) && std::isnan(b)) {
+    b = a;
+  }
+  return a < b ? a : b;
+}
+
 }  // namespace NEON2RVV
diff --git a/tests/common.h b/tests/common.h
@@ -377,6 +377,11 @@ static void merge_arrays(const T *arr1, const T *arr2, const T *arr3, const T *a
 float bankers_rounding(float val);
 double bankers_rounding(double val);
 
+float maxnm(float a, float b);
+float minnm(float a, float b);
+double maxnm(double a, double b);
+double minnm(double a, double b);
+
 #define CHECK_RESULT(EXP)      \
   if ((EXP) != TEST_SUCCESS) { \
     return TEST_FAIL;          \

diff --git a/tests/impl.cpp b/tests/impl.cpp
@@ -15775,17 +15775,119 @@ result_t test_vpminq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
 #endif  // ENABLE_TEST_ALL
 }
 
-result_t test_vpmaxnm_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
+result_t test_vpmaxnm_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
+#ifdef ENABLE_TEST_ALL
+  const float *_a = (const float *)impl.test_cases_float_pointer1;
+  const float *_b = (const float *)impl.test_cases_float_pointer2;
+  float _c[2];
+  for (int i = 0; i < 1; i++) {
+    _c[i] = maxnm(_a[2 * i], _a[2 * i + 1]);
+    _c[i + 1] = maxnm(_b[2 * i], _b[2 * i + 1]);
+  }
+
+  float32x2_t a = vld1_f32(_a);
+  float32x2_t b = vld1_f32(_b);
+  float32x2_t c = vpmaxnm_f32(a, b);
+  return validate_float(c, _c[0], _c[1]);
+#else
+  return TEST_UNIMPL;
+#endif  // ENABLE_TEST_ALL
+}
+
+result_t test_vpmaxnmq_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
+#ifdef ENABLE_TEST_ALL
+  const float *_a = (const float *)impl.test_cases_float_pointer1;
+  const float *_b = (const float *)impl.test_cases_float_pointer2;
+  float _c[4];
+  for (int i = 0; i < 2; i++) {
+    _c[i] = maxnm(_a[2 * i], _a[2 * i + 1]);
+    _c[i + 2] = maxnm(_b[2 * i], _b[2 * i + 1]);
+  }
+
+  float32x4_t a = vld1q_f32(_a);
+  float32x4_t b = vld1q_f32(_b);
+  float32x4_t c = vpmaxnmq_f32(a, b);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
+#else
+  return TEST_UNIMPL;
+#endif  // ENABLE_TEST_ALL
+}
+
+result_t test_vpmaxnmq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
+#ifdef ENABLE_TEST_ALL
+  const double *_a = (const double *)impl.test_cases_float_pointer1;
+  const double *_b = (const double *)impl.test_cases_float_pointer2;
+  double _c[2];
+  for (int i = 0; i < 1; i++) {
+    _c[i] = maxnm(_a[2 * i], _a[2 * i + 1]);
+    _c[i + 1] = maxnm(_b[2 * i], _b[2 * i + 1]);
+  }
+
+  float64x2_t a = vld1q_f64(_a);
+  float64x2_t b = vld1q_f64(_b);
+  float64x2_t c = vpmaxnmq_f64(a, b);
+  return validate_double(c, _c[0], _c[1]);
+#else
+  return TEST_UNIMPL;
+#endif  // ENABLE_TEST_ALL
+}
 
-result_t test_vpmaxnmq_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
+result_t test_vpminnm_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
+#ifdef ENABLE_TEST_ALL
+  const float *_a = (const float *)impl.test_cases_float_pointer1;
+  const float *_b = (const float *)impl.test_cases_float_pointer2;
+  float _c[2];
+  for (int i = 0; i < 1; i++) {
+    _c[i] = minnm(_a[2 * i], _a[2 * i + 1]);
+    _c[i + 1] = minnm(_b[2 * i], _b[2 * i + 1]);
+  }
 
-result_t test_vpmaxnmq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
+  float32x2_t a = vld1_f32(_a);
+  float32x2_t b = vld1_f32(_b);
+  float32x2_t c = vpminnm_f32(a, b);
+  return validate_float(c, _c[0], _c[1]);
+#else
+  return TEST_UNIMPL;
+#endif  // ENABLE_TEST_ALL
+}
+
+result_t test_vpminnmq_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
+#ifdef ENABLE_TEST_ALL
+  const float *_a = (const float *)impl.test_cases_float_pointer1;
+  const float *_b = (const float *)impl.test_cases_float_pointer2;
+  float _c[4];
+  for (int i = 0; i < 2; i++) {
+    _c[i] = minnm(_a[2 * i], _a[2 * i + 1]);
+    _c[i + 2] = minnm(_b[2 * i], _b[2 * i + 1]);
+  }
 
-result_t test_vpminnm_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
+  float32x4_t a = vld1q_f32(_a);
+  float32x4_t b = vld1q_f32(_b);
+  float32x4_t c = vpminnmq_f32(a, b);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
+#else
+  return TEST_UNIMPL;
+#endif  // ENABLE_TEST_ALL
+}
 
-result_t test_vpminnmq_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
+result_t test_vpminnmq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
+#ifdef ENABLE_TEST_ALL
+  const double *_a = (const double *)impl.test_cases_float_pointer1;
+  const double *_b = (const double *)impl.test_cases_float_pointer2;
+  double _c[2];
+  for (int i = 0; i < 1; i++) {
+    _c[i] = minnm(_a[2 * i], _a[2 * i + 1]);
+    _c[i + 1] = minnm(_b[2 * i], _b[2 * i + 1]);
+  }
 
-result_t test_vpminnmq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
+  float64x2_t a = vld1q_f64(_a);
+  float64x2_t b = vld1q_f64(_b);
+  float64x2_t c = vpminnmq_f64(a, b);
+  return validate_double(c, _c[0], _c[1]);
+#else
+  return TEST_UNIMPL;
+#endif  // ENABLE_TEST_ALL
+}
 
 result_t test_vpaddd_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL

diff --git a/tests/impl.h b/tests/impl.h
@@ -869,12 +869,12 @@
   _(vpminq_u32)                                                                  \
   _(vpminq_f32)                                                                  \
   _(vpminq_f64)                                                                  \
-  /*_(vpmaxnm_f32)                                                            */ \
-  /*_(vpmaxnmq_f32)                                                           */ \
-  /*_(vpmaxnmq_f64)                                                           */ \
-  /*_(vpminnm_f32)                                                            */ \
-  /*_(vpminnmq_f32)                                                           */ \
-  /*_(vpminnmq_f64)                                                           */ \
+  _(vpmaxnm_f32)                                                                 \
+  _(vpmaxnmq_f32)                                                                \
+  _(vpmaxnmq_f64)                                                                \
+  _(vpminnm_f32)                                                                 \
+  _(vpminnmq_f32)                                                                \
+  _(vpminnmq_f64)                                                                \
   _(vpaddd_s64)                                                                  \
   _(vpaddd_u64)                                                                  \
   _(vpadds_f32)                                                                  \