From 9e1d1140668e31addb5881fe205acc05a8e3346d Mon Sep 17 00:00:00 2001
From: PingTakPeterTang <ptpt@rivosinc.com>
Date: Wed, 6 Nov 2024 06:48:55 -0800
Subject: [PATCH 1/2] enhance test infra and testing; fixed 2 bugs caught by
 extra tests

---
 include/rvvlm_powD.inc.h       |   4 +-
 include/rvvlm_sinandcosD.inc.h |   1 +
 include/rvvlm_sincosD.inc.h    |   1 +
 include/rvvlm_tanD.inc.h       |   1 +
 test/include/near_NPiby2_tbl.h |  69 ++++++++++++++++++++
 test/include/test_infra.h      |  62 +++++++++++++++++-
 test/src/test_cos.cpp          |  14 +++-
 test/src/test_exp.cpp          |  11 +++-
 test/src/test_exp10.cpp        |  11 +++-
 test/src/test_exp2.cpp         |  11 +++-
 test/src/test_expm1.cpp        |  11 +++-
 test/src/test_infra.cpp        | 115 ++++++++++++++++++++++++++++++++-
 test/src/test_log.cpp          |   8 +--
 test/src/test_log10.cpp        |   8 +--
 test/src/test_log1p.cpp        |   8 +--
 test/src/test_log2.cpp         |   8 +--
 test/src/test_pow.cpp          |  57 +++++++++++++---
 test/src/test_sin.cpp          |  19 ++++--
 test/src/test_sincos.cpp       |  11 ++++
 test/src/test_tan.cpp          |  14 +++-
 20 files changed, 397 insertions(+), 47 deletions(-)
 create mode 100644 test/include/near_NPiby2_tbl.h

diff --git a/include/rvvlm_powD.inc.h b/include/rvvlm_powD.inc.h
index 72b7f8d..a71d38e 100644
--- a/include/rvvlm_powD.inc.h
+++ b/include/rvvlm_powD.inc.h
@@ -92,10 +92,12 @@
         vy = __riscv_vfmin_mu(current_cases, vy, vy, 0x1.0p53, vlen);          \
         vy = __riscv_vfmax_mu(current_cases, vy, vy, -0x1.0p53, vlen);         \
         VINT y_to_int = __riscv_vfcvt_x(current_cases, vy, vlen);              \
-        /* TODO: y_to_int_fp and y_is_int need to be used */                   \
         VFLOAT y_to_int_fp = __riscv_vfcvt_f(current_cases, y_to_int, vlen);   \
         VBOOL y_is_int = __riscv_vmfeq(current_cases, vy, y_to_int_fp, vlen);  \
         VINT sign_z = __riscv_vsll(y_to_int, 63, vlen);                        \
+        VINT zero;                                                             \
+        zero = __riscv_vxor(zero, zero, vlen);                                 \
+        sign_z = __riscv_vmerge(zero, sign_z, y_is_int, vlen);                 \
         /* the parity is used later on to manipulate sign, hence sll 63 bits   \
          */                                                                    \
                                                                                \
diff --git a/include/rvvlm_sinandcosD.inc.h b/include/rvvlm_sinandcosD.inc.h
index c1a4675..5c37e76 100644
--- a/include/rvvlm_sinandcosD.inc.h
+++ b/include/rvvlm_sinandcosD.inc.h
@@ -67,6 +67,7 @@ void F_VER1(API) {
       VFLOAT S = __riscv_vfsub(r_hi, A, vlen);
       VFLOAT s = __riscv_vfsub(r_hi, S, vlen);
       s = __riscv_vfsub(s, A, vlen);
+      s = __riscv_vfsub(s, a, vlen);
       s = __riscv_vfnmsac(s, PIBY2_LO, n_flt, vlen);
       r = __riscv_vmerge(r, S, r_small, vlen);
       r_delta = __riscv_vmerge(r_delta, s, r_small, vlen);
diff --git a/include/rvvlm_sincosD.inc.h b/include/rvvlm_sincosD.inc.h
index 05b5f85..5209676 100644
--- a/include/rvvlm_sincosD.inc.h
+++ b/include/rvvlm_sincosD.inc.h
@@ -78,6 +78,7 @@ void F_VER1(API) {
       VFLOAT S = __riscv_vfsub(r_hi, A, vlen);
       VFLOAT s = __riscv_vfsub(r_hi, S, vlen);
       s = __riscv_vfsub(s, A, vlen);
+      s = __riscv_vfsub(s, a, vlen);
       s = __riscv_vfnmsac(s, PIBY2_LO, n_flt, vlen);
       r = __riscv_vmerge(r, S, r_small, vlen);
       r_delta = __riscv_vmerge(r_delta, s, r_small, vlen);
diff --git a/include/rvvlm_tanD.inc.h b/include/rvvlm_tanD.inc.h
index 876c53a..9980abe 100644
--- a/include/rvvlm_tanD.inc.h
+++ b/include/rvvlm_tanD.inc.h
@@ -66,6 +66,7 @@ void F_VER1(API) {
       VFLOAT S = __riscv_vfsub(r_hi, A, vlen);
       VFLOAT s = __riscv_vfsub(r_hi, S, vlen);
       s = __riscv_vfsub(s, A, vlen);
+      s = __riscv_vfsub(s, a, vlen);
       s = __riscv_vfnmsac(s, PIBY2_LO, n_flt, vlen);
       r = __riscv_vmerge(r, S, r_small, vlen);
       r_delta = __riscv_vmerge(r_delta, s, r_small, vlen);
diff --git a/test/include/near_NPiby2_tbl.h b/test/include/near_NPiby2_tbl.h
new file mode 100644
index 0000000..3ca7f6b
--- /dev/null
+++ b/test/include/near_NPiby2_tbl.h
@@ -0,0 +1,69 @@
+// SPDX-FileCopyrightText: 2024 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#define NB_TEST_ARGS 63 * 3
+static const double dbl_near_NPiby2_tbl[NB_TEST_ARGS] = {
+    0x1.6c6cbc45dc8dep+9,   0x1.6c6cbc45dc8dep+10,  0x1.6c6cbc45dc8dep+11,
+    0x1.6c6cbc45dc8dep+12,  0x1.635e3d74befcap+14,  0x1.67e57cdd4dc54p+15,
+    0x1.65a1dd290660fp+16,  0x1.bf9b3c6059d24p+17,  0x1.39c6fd67805a7p+18,
+    0x1.39c6fd67805a7p+19,  0x1.9eb7148f354d6p+20,  0x1.9eb7148f354d6p+20,
+    0x1.d3ecce1f28274p+22,  0x1.b951f1572eba5p+23,  0x1.b951f1572eba5p+24,
+    0x1.b951f1572eba5p+25,  0x1.b951f1572eba5p+26,  0x1.b951f1572eba5p+27,
+    0x1.b951f1572eba5p+28,  0x1.b951f1572eba5p+29,  0x1.b951f1572eba5p+30,
+    0x1.b951f1572eba5p+31,  0x1.5c9508c58aafap+32,  0x1.5c9508c58aafap+32,
+    0x1.de5e5054e921bp+34,  0x1.46546a5bd73ccp+35,  0x1.46546a5bd73ccp+35,
+    0x1.46546a5bd73ccp+35,  0x1.bb23eaa3db16dp+38,  0x1.065c829d68730p+39,
+    0x1.065c829d68730p+39,  0x1.065c829d68730p+39,  0x1.065c829d68730p+39,
+    0x1.065c829d68730p+39,  0x1.065c829d68730p+40,  0x1.065c829d68730p+41,
+    0x1.f42d52c35675dp+46,  0x1.7512069b7430dp+47,  0x1.7512069b7430dp+48,
+    0x1.7512069b7430dp+49,  0x1.44630cc2cad9dp+50,  0x1.5cba89af1f855p+51,
+    0x1.5cba89af1f855p+52,  0x1.56a4aa740a5a7p+53,  0x1.59af9a1194efep+54,
+    0x1.59af9a1194efep+54,  0x1.ae9608c734e12p+56,  0x1.ae9608c734e12p+56,
+    0x1.c3cfa4749cdd7p+58,  0x1.c3cfa4749cdd7p+59,  0x1.c3cfa4749cdd7p+60,
+    0x1.c3cfa4749cdd7p+61,  0x1.ed814f0220525p+62,  0x1.ed814f0220525p+63,
+    0x1.a7f3bc5a7ed9ep+64,  0x1.a7f3bc5a7ed9ep+64,  0x1.a7f3bc5a7ed9ep+65,
+    0x1.782b7a20df6d4p+67,  0x1.782b7a20df6d4p+67,  0x1.782b7a20df6d4p+67,
+    0x1.782b7a20df6d4p+68,  0x1.782b7a20df6d4p+69,  0x1.782b7a20df6d4p+70,
+    0x1.782b7a20df6d4p+71,  0x1.a4ffb379a019cp+74,  0x1.308ab84507e83p+75,
+    0x1.99caa5236feeap+76,  0x1.99caa5236feeap+76,  0x1.99caa5236feeap+77,
+    0x1.99caa5236feeap+78,  0x1.99caa5236feeap+79,  0x1.e0664dbedfec5p+81,
+    0x1.e0664dbedfec5p+82,  0x1.e0664dbedfec5p+83,  0x1.e0664dbedfec5p+84,
+    0x1.f19e5d71b26bap+85,  0x1.f19e5d71b26bap+85,  0x1.f19e5d71b26bap+86,
+    0x1.f19e5d71b26bap+87,  0x1.d4ab7990f194dp+89,  0x1.66bd5424e5655p+90,
+    0x1.66bd5424e5655p+91,  0x1.66bd5424e5655p+92,  0x1.66bd5424e5655p+93,
+    0x1.66bd5424e5655p+94,  0x1.c3dc1a597f202p+95,  0x1.c3dc1a597f202p+95,
+    0x1.c3dc1a597f202p+96,  0x1.e50fec1788957p+98,  0x1.e50fec1788957p+99,
+    0x1.e50fec1788957p+100, 0x1.e50fec1788957p+101, 0x1.77d794c891d1cp+102,
+    0x1.dae3a00451b5fp+103, 0x1.dae3a00451b5fp+104, 0x1.1c6cc29b3b153p+105,
+    0x1.d99e1681eada0p+106, 0x1.d99e1681eada0p+106, 0x1.d99e1681eada0p+106,
+    0x1.d99e1681eada0p+106, 0x1.d99e1681eada0p+106, 0x1.d99e1681eada0p+106,
+    0x1.f8d36a8915598p+112, 0x1.ae04fdb542be8p+113, 0x1.ae04fdb542be8p+113,
+    0x1.ae04fdb542be8p+113, 0x1.ae04fdb542be8p+113, 0x1.ae04fdb542be8p+114,
+    0x1.ed82892c91569p+118, 0x1.47623428452f0p+119, 0x1.47623428452f0p+119,
+    0x1.47623428452f0p+119, 0x1.f7e7a76e07787p+122, 0x1.f7e7a76e07787p+123,
+    0x1.e7426933f5760p+124, 0x1.e7426933f5760p+124, 0x1.e7426933f5760p+124,
+    0x1.e7426933f5760p+124, 0x1.e7426933f5760p+124, 0x1.e7426933f5760p+124,
+    0x1.f91a847a07755p+130, 0x1.504cac51f1eafp+131, 0x1.504cac51f1eafp+132,
+    0x1.504cac51f1eafp+133, 0x1.504cac51f1eafp+134, 0x1.504cac51f1eafp+135,
+    0x1.504cac51f1eafp+136, 0x1.8a56c23d2dfe0p+137, 0x1.8a56c23d2dfe0p+137,
+    0x1.b2e25ce13d6a7p+139, 0x1.e42d3528e32a3p+140, 0x1.fcd2a14cb60a1p+141,
+    0x1.fcd2a14cb60a1p+142, 0x1.5ad5a62cb1cc9p+143, 0x1.5ad5a62cb1cc9p+144,
+    0x1.5ad5a62cb1cc9p+145, 0x1.5ad5a62cb1cc9p+146, 0x1.f1cdf37f20ff1p+147,
+    0x1.a651ccd5e965dp+148, 0x1.a651ccd5e965dp+149, 0x1.b930d680374c2p+150,
+    0x1.b930d680374c2p+150, 0x1.b930d680374c2p+151, 0x1.b930d680374c2p+152,
+    0x1.641bcd12e3311p+154, 0x1.205a1c297f6b9p+155, 0x1.205a1c297f6b9p+156,
+    0x1.205a1c297f6b9p+157, 0x1.17e1e60c52f2ep+158, 0x1.17e1e60c52f2ep+158,
+    0x1.ee076da427656p+160, 0x1.82f4a9d83d2c2p+161, 0x1.82f4a9d83d2c2p+161,
+    0x1.682ff8e5429ddp+163, 0x1.682ff8e5429ddp+164, 0x1.682ff8e5429ddp+165,
+    0x1.682ff8e5429ddp+166, 0x1.cca7515e2585cp+167, 0x1.cca7515e2585cp+167,
+    0x1.bdf4d57f7f0c4p+169, 0x1.c54e136ed2490p+170, 0x1.c54e136ed2490p+170,
+    0x1.c54e136ed2490p+170, 0x1.fc218660d7c2fp+173, 0x1.fc218660d7c2fp+174,
+    0x1.d28d5c4c5e73bp+175, 0x1.d28d5c4c5e73bp+176, 0x1.d28d5c4c5e73bp+177,
+    0x1.2722f5d698acep+178, 0x1.2722f5d698acep+178, 0x1.2722f5d698acep+179,
+    0x1.76e1ee2edd343p+181, 0x1.0539b48d14c55p+182, 0x1.0539b48d14c55p+183,
+    0x1.0539b48d14c55p+184, 0x1.0539b48d14c55p+185, 0x1.6014d4a7e0086p+186,
+    0x1.b5441ee104c98p+187, 0x1.b5441ee104c98p+187, 0x1.f7fd817cb39b5p+189,
+    0x1.f7fd817cb39b5p+190, 0x1.bb4c8e40cbe5dp+191, 0x1.bb4c8e40cbe5dp+192,
+    0x1.3ecf49b990c61p+193, 0x1.0090a775f3363p+194, 0x1.1faff897c1fe2p+195,
+    0x1.1faff897c1fe2p+195, 0x1.e7e44a78ac18cp+197, 0x1.e7e44a78ac18cp+197};
diff --git a/test/include/test_infra.h b/test/include/test_infra.h
index ff3859c..0ed300e 100644
--- a/test/include/test_infra.h
+++ b/test/include/test_infra.h
@@ -6,8 +6,53 @@
 
 #include <stdio.h>
 
+#define DBL_NEG_INF (int64_t)0xFFF0000000000000
+#define DBL_NEG_GT1 (int64_t)0xBFF0000000000001
+#define DBL_NEG_ONE (int64_t)0xBFF0000000000000
+#define DBL_NEG_LT1 (int64_t)0xBFEFFFFFFFFFFFFF
+#define DBL_NEG_TINY (int64_t)0x800FFFFFFFFFFFFF
+#define DBL_NEG_ZERO (int64_t)0x8000000000000000
+#define DBL_POS_ZERO (int64_t)0x0000000000000000
+#define DBL_POS_TINY (int64_t)0x000FFFFFFFFFFFFF
+#define DBL_POS_LT1 (int64_t)0x3FEFFFFFFFFFFFFF
+#define DBL_POS_ONE (int64_t)0x3FF0000000000000
+#define DBL_POS_GT1 (int64_t)0x3FF0000000000001
+#define DBL_POS_INF (int64_t)0x7FF0000000000000
+#define DBL_QNAN (int64_t)0x7FF8000000000000
+#define DBL_SNAN (int64_t)0x7FF4000000000000
+
+#define NB_TV_FOR_TRIG 4
+#define TV_FOR_TRIG                                                            \
+  {DBL_QNAN,    DBL_QNAN, DBL_SNAN,    DBL_QNAN,                               \
+   DBL_NEG_INF, DBL_QNAN, DBL_POS_INF, DBL_QNAN}
+
+#define NB_TV_FOR_EXP 4
+#define TV_FOR_EXP                                                             \
+  {DBL_QNAN,    DBL_QNAN,     DBL_SNAN,    DBL_QNAN,                           \
+   DBL_NEG_INF, DBL_POS_ZERO, DBL_POS_INF, DBL_POS_INF}
+
+#define NB_TV_FOR_EXPM1 5
+#define TV_FOR_EXPM1                                                           \
+  {DBL_QNAN,    DBL_QNAN,    DBL_SNAN,    DBL_QNAN,     DBL_NEG_INF,           \
+   DBL_NEG_ONE, DBL_POS_INF, DBL_POS_INF, DBL_POS_ZERO, DBL_POS_ZERO}
+
+#define NB_TV_FOR_LOG 8
+#define TV_FOR_LOG                                                             \
+  {DBL_QNAN,     DBL_QNAN,    DBL_SNAN,     DBL_QNAN,                          \
+   DBL_NEG_INF,  DBL_QNAN,    DBL_NEG_GT1,  DBL_QNAN,                          \
+   DBL_NEG_TINY, DBL_QNAN,    DBL_NEG_ZERO, DBL_NEG_INF,                       \
+   DBL_POS_ZERO, DBL_NEG_INF, DBL_POS_INF,  DBL_POS_INF}
+
+#define NB_TV_FOR_LOG1P 7
+#define TV_FOR_LOG1P                                                           \
+  {DBL_QNAN,     DBL_QNAN,     DBL_SNAN,    DBL_QNAN,    DBL_NEG_INF,          \
+   DBL_QNAN,     DBL_NEG_GT1,  DBL_QNAN,    DBL_NEG_ONE, DBL_NEG_INF,          \
+   DBL_POS_ZERO, DBL_POS_ZERO, DBL_POS_INF, DBL_POS_INF}
+
 #define COMMENT(comment)                                                       \
-  { printf("\n=====\t" comment "\n"); }
+  {                                                                            \
+    printf("\n=====\t" comment "\n");                                          \
+  }
 
 // Most common interface: testing on 1 interval
 // for 1-in-1-out unit-stride function
@@ -20,8 +65,14 @@ void report_err_fp64(void (*test_func)(size_t, const double *, double *,
                      long double (*ref_func)(long double), int, double, double,
                      int, double = 1.0);
 
+void report_err_fp64(void (*test_func)(size_t, const double *, double *,
+                                       double *),
+                     long double (*ref_func)(long double), int, const double *,
+                     int, double = 1.0);
+
 void report_err_fp64(void (*test_func)(size_t, const double *, double *),
-                     long double (*ref_func)(long double), const double *, int);
+                     long double (*ref_func)(long double), const double *, int,
+                     double = 1.0);
 
 void report_mixederr_fp64(void (*test_func)(size_t, const double *, double *),
                           long double (*ref_func)(long double), double, double,
@@ -86,6 +137,13 @@ void show_special2_fp64(void (*test_func)(size_t, const double *,
                                           const double *, double *),
                         int, char *);
 
+void test_vectors_fp64(void (*test_func)(size_t, const double *, double *),
+                       int64_t *, int);
+
+void test_vectors2_fp64(void (*test_func)(size_t, const double *,
+                                          const double *, double *),
+                        int64_t *, int, int);
+
 void trig_2pi_reduction(long double, long double *, int64_t *);
 
 long double acospil(long double);
diff --git a/test/src/test_cos.cpp b/test/src/test_cos.cpp
index 2fa86aa..4930d4a 100644
--- a/test/src/test_cos.cpp
+++ b/test/src/test_cos.cpp
@@ -9,12 +9,22 @@
 #include "test_infra.h"
 
 TEST(cos, special) {
+  int nb_tv;
+  int64_t tv_in_out[2 * (NB_TV_FOR_TRIG)] = TV_FOR_TRIG;
+
+  nb_tv = NB_TV_FOR_TRIG;
+
+  test_vectors_fp64(rvvlm_cos, tv_in_out, nb_tv);
+}
+
+TEST(cos, near_NPiby2) {
+#include "near_NPiby2_tbl.h"
   unsigned long nb_tests;
-  double x_start, x_end;
 
   COMMENT("cos: current chosen algorithm; reduced argument in FP64 only")
 
-  show_special_fp64(rvvlm_cos, "Special Value handling of this function");
+  nb_tests = (NB_TEST_ARGS);
+  report_err_fp64(rvvlm_cos, cosl, dbl_near_NPiby2_tbl, nb_tests);
 }
 
 TEST(cos, small_args) {
diff --git a/test/src/test_exp.cpp b/test/src/test_exp.cpp
index d97a724..517a2f6 100644
--- a/test/src/test_exp.cpp
+++ b/test/src/test_exp.cpp
@@ -8,14 +8,21 @@
 #include "rvvlm.h"
 #include "test_infra.h"
 
+TEST(exp, special) {
+  int nb_tv;
+  int64_t tv_in_out[2 * (NB_TV_FOR_EXP)] = TV_FOR_EXP;
+
+  nb_tv = NB_TV_FOR_EXP;
+
+  test_vectors_fp64(rvvlm_exp, tv_in_out, nb_tv);
+}
+
 TEST(exp, small_args) {
   unsigned long nb_tests;
   double x_start, x_end;
 
   COMMENT("exp: current chosen algorithm; reduced argument in FP64 only")
 
-  show_special_fp64(rvvlm_exp, "Special Value handling of this function");
-
   x_start = -0.34;
   x_end = 0.34;
   nb_tests = 100000;
diff --git a/test/src/test_exp10.cpp b/test/src/test_exp10.cpp
index ffdc649..88b82e0 100644
--- a/test/src/test_exp10.cpp
+++ b/test/src/test_exp10.cpp
@@ -8,14 +8,21 @@
 #include "rvvlm.h"
 #include "test_infra.h"
 
+TEST(exp10, special) {
+  int nb_tv;
+  int64_t tv_in_out[2 * (NB_TV_FOR_EXP)] = TV_FOR_EXP;
+
+  nb_tv = NB_TV_FOR_EXP;
+
+  test_vectors_fp64(rvvlm_exp10, tv_in_out, nb_tv);
+}
+
 TEST(exp10, small_args) {
   unsigned long nb_tests;
   double x_start, x_end;
 
   COMMENT("exp10: current chosen algorithm; reduced argument in FP64 only")
 
-  show_special_fp64(rvvlm_exp10, "Special Value handling of this function");
-
   x_start = -0.34;
   x_end = 0.34;
   nb_tests = 30000;
diff --git a/test/src/test_exp2.cpp b/test/src/test_exp2.cpp
index 9ff2919..1851ae9 100644
--- a/test/src/test_exp2.cpp
+++ b/test/src/test_exp2.cpp
@@ -8,14 +8,21 @@
 #include "rvvlm.h"
 #include "test_infra.h"
 
+TEST(exp2, special) {
+  int nb_tv;
+  int64_t tv_in_out[2 * (NB_TV_FOR_EXP)] = TV_FOR_EXP;
+
+  nb_tv = NB_TV_FOR_EXP;
+
+  test_vectors_fp64(rvvlm_exp2, tv_in_out, nb_tv);
+}
+
 TEST(exp2, small_args) {
   unsigned long nb_tests;
   double x_start, x_end;
 
   COMMENT("exp2: current chosen algorithm; reduced argument in FP64 only")
 
-  show_special_fp64(rvvlm_exp2, "Special Value handling of this function");
-
   x_start = -0.34;
   x_end = 0.34;
   nb_tests = 80000;
diff --git a/test/src/test_expm1.cpp b/test/src/test_expm1.cpp
index 841c12d..e785376 100644
--- a/test/src/test_expm1.cpp
+++ b/test/src/test_expm1.cpp
@@ -8,14 +8,21 @@
 #include "rvvlm.h"
 #include "test_infra.h"
 
+TEST(expm1, special) {
+  int nb_tv;
+  int64_t tv_in_out[2 * (NB_TV_FOR_EXPM1)] = TV_FOR_EXPM1;
+
+  nb_tv = NB_TV_FOR_EXPM1;
+
+  test_vectors_fp64(rvvlm_expm1, tv_in_out, nb_tv);
+}
+
 TEST(expm1, small_args) {
   unsigned long nb_tests;
   double x_start, x_end;
 
   COMMENT("expm1: current chosen algorithm; reduced argument in FP64 only")
 
-  show_special_fp64(rvvlm_expm1, "Special Value handling of this function");
-
   x_start = -0.01;
   x_end = 0.01;
   nb_tests = 30000;
diff --git a/test/src/test_infra.cpp b/test/src/test_infra.cpp
index 8ce42aa..f4a7c37 100644
--- a/test/src/test_infra.cpp
+++ b/test/src/test_infra.cpp
@@ -33,6 +33,7 @@ union sui64_fp64 {
 #define MIN(x, y) ((x) < (y) ? (x) : (y))
 
 #define N_PTS_MAX 100000
+#define N_TV_MAX 200
 
 #define VERBOSE 0
 
@@ -84,10 +85,11 @@ union sui64_fp64 fp64_special_values[N_SPECIALS] = {
 
 void report_err_fp64(void (*test_func)(size_t, const double *, double *),
                      long double (*ref_func)(long double),
-                     const double *test_args, int nb_test_args) {
+                     const double *test_args, int nb_test_args,
+                     double threshold) {
 
   long double y_ref;
-  double *x, *y, delta;
+  double *y;
   double abs_err, rel_err, ulp_err;
   double max_abs_err, max_rel_err, max_ulp_err;
 
@@ -135,6 +137,8 @@ void report_err_fp64(void (*test_func)(size_t, const double *, double *),
   }
   printf("Maximum observed ULP      error is %3.3lf\n", max_ulp_err);
 
+  EXPECT_LT((double)max_ulp_err, threshold);
+
   free(y);
 }
 
@@ -214,6 +218,72 @@ void report_err_fp64(void (*test_func)(size_t, const double *, double *),
   free(y);
 }
 
+void report_err_fp64(void (*test_func)(size_t, const double *, double *,
+                                       double *),
+                     long double (*ref_func)(long double), int which_output,
+                     const double *test_args, int nb_test_args,
+                     double threshold) {
+
+  long double y_ref;
+  double *y, *z;
+  long double abs_err, rel_err, ulp_err;
+  long double max_abs_err, max_rel_err, max_ulp_err;
+
+  y = (double *)malloc(nb_test_args * sizeof(double));
+  z = (double *)malloc(nb_test_args * sizeof(double));
+
+  max_abs_err = 0.0;
+  max_rel_err = 0.0;
+  max_ulp_err = 0.0;
+
+  // call function under test
+  if (which_output == 1) {
+    test_func((size_t)nb_test_args, test_args, y, z);
+  } else {
+    test_func((size_t)nb_test_args, test_args, z, y);
+  }
+
+  // now for each point we compute error and log the max
+  for (int j = 0; j < nb_test_args; j++) {
+    y_ref = ref_func((long double)test_args[j]);
+    abs_err = (long double)y[j] - y_ref;
+    abs_err = ABS(abs_err);
+    if (ABS((double)y_ref) > 0.0) {
+      rel_err = abs_err / ABS((double)y_ref);
+    } else {
+      rel_err = abs_err / 0x1.0p-1074;
+    }
+    ulp_err = abs_err / ulp_64((double)y_ref);
+
+    max_abs_err = MAX(max_abs_err, abs_err);
+    max_rel_err = MAX(max_rel_err, rel_err);
+    max_ulp_err = MAX(max_ulp_err, ulp_err);
+
+    if (VERBOSE) {
+      union sui64_fp64 xxx, yyy;
+      xxx.f = test_args[j];
+      yyy.f = y[j];
+      printf("--input %24.17le, 0x%016lx, output %24.17le, 0x%016lx \n", xxx.f,
+             xxx.ui, yyy.f, yyy.ui);
+      printf("  reference %24.17le\n\n", (double)y_ref);
+    }
+  }
+  printf("----------------------------\n");
+  printf("Tested %d special test arguments\n", nb_test_args);
+  printf("Maximum observed absolute error is %8.3Le\n", max_abs_err);
+  printf("Maximum observed relative error is %8.3Le\n", max_rel_err);
+  if (max_rel_err > 0.0) {
+    printf("                          which is 2^(%3.3Lf)\n",
+           log2(max_rel_err));
+  }
+  printf("Maximum observed ULP      error is %3.3Lf\n", max_ulp_err);
+
+  EXPECT_LT(max_ulp_err, threshold);
+
+  free(y);
+  free(z);
+}
+
 void report_err_fp64(void (*test_func)(size_t, const double *, double *,
                                        double *),
                      long double (*ref_func)(long double), int which_output,
@@ -1154,6 +1224,47 @@ void report_err2_fp64(void (*test_func)(size_t, const double *, size_t,
   free(z);
 }
 
+void test_vectors_fp64(void (*test_func)(size_t, const double *, double *),
+                       int64_t *tv_in_out, int nb_tv) {
+
+  double x[N_TV_MAX], y[N_TV_MAX];
+  union sui64_fp64 xxx, yyy;
+
+  for (int i = 0; i < nb_tv; i++) {
+    xxx.si = tv_in_out[2 * i];
+    x[i] = xxx.f;
+  }
+  test_func((size_t)nb_tv, x, y);
+  for (int i = 0; i < nb_tv; i++) {
+    yyy.f = y[i];
+    EXPECT_EQ(yyy.si, tv_in_out[2 * i + 1]);
+  }
+}
+
+void test_vectors2_fp64(void (*test_func)(size_t, const double *,
+                                          const double *, double *),
+                        int64_t *tv_in_out, int nb_tv, int swap_xy) {
+
+  double x[N_TV_MAX], y[N_TV_MAX], z[N_TV_MAX];
+  union sui64_fp64 xxx, yyy, zzz;
+
+  for (int i = 0; i < nb_tv; i++) {
+    xxx.si = tv_in_out[3 * i];
+    x[i] = xxx.f;
+    yyy.si = tv_in_out[3 * i + 1];
+    y[i] = yyy.f;
+  }
+  if (swap_xy == 0) {
+    test_func((size_t)nb_tv, x, y, z);
+  } else {
+    test_func((size_t)nb_tv, y, x, z);
+  }
+  for (int i = 0; i < nb_tv; i++) {
+    zzz.f = z[i];
+    EXPECT_EQ(zzz.si, tv_in_out[3 * i + 2]);
+  }
+}
+
 void show_special_fp64(void (*test_func)(size_t, const double *, double *),
                        const char *title) {
 
diff --git a/test/src/test_log.cpp b/test/src/test_log.cpp
index 9ec687b..3eb02be 100644
--- a/test/src/test_log.cpp
+++ b/test/src/test_log.cpp
@@ -9,12 +9,12 @@
 #include "test_infra.h"
 
 TEST(log, special) {
-  unsigned long nb_tests;
-  double x_start, x_end;
+  int nb_tv;
+  int64_t tv_in_out[2 * (NB_TV_FOR_LOG)] = TV_FOR_LOG;
 
-  COMMENT("log: current chosen algorithm; reduced argument in FP64 only")
+  nb_tv = NB_TV_FOR_LOG;
 
-  show_special_fp64(rvvlm_log, "Special Value handling of this function");
+  test_vectors_fp64(rvvlm_log, tv_in_out, nb_tv);
 }
 
 TEST(log, around_1) {
diff --git a/test/src/test_log10.cpp b/test/src/test_log10.cpp
index 938b7c6..e696fd2 100644
--- a/test/src/test_log10.cpp
+++ b/test/src/test_log10.cpp
@@ -9,12 +9,12 @@
 #include "test_infra.h"
 
 TEST(log10, special) {
-  unsigned long nb_tests;
-  double x_start, x_end;
+  int nb_tv;
+  int64_t tv_in_out[2 * (NB_TV_FOR_LOG)] = TV_FOR_LOG;
 
-  COMMENT("log10: current chosen algorithm; reduced argument in FP64 only")
+  nb_tv = NB_TV_FOR_LOG;
 
-  show_special_fp64(rvvlm_log10, "Special Value handling of this function");
+  test_vectors_fp64(rvvlm_log10, tv_in_out, nb_tv);
 }
 
 TEST(log10, around_1) {
diff --git a/test/src/test_log1p.cpp b/test/src/test_log1p.cpp
index cbf3bcf..0ced7fd 100644
--- a/test/src/test_log1p.cpp
+++ b/test/src/test_log1p.cpp
@@ -9,12 +9,12 @@
 #include "test_infra.h"
 
 TEST(log1p, special) {
-  unsigned long nb_tests;
-  double x_start, x_end;
+  int nb_tv;
+  int64_t tv_in_out[2 * (NB_TV_FOR_LOG1P)] = TV_FOR_LOG1P;
 
-  COMMENT("log1p: current chosen algorithm; reduced argument in FP64 only")
+  nb_tv = NB_TV_FOR_LOG1P;
 
-  show_special_fp64(rvvlm_log1p, "Special Value handling of this function");
+  test_vectors_fp64(rvvlm_log1p, tv_in_out, nb_tv);
 }
 
 TEST(log1p, small_args) {
diff --git a/test/src/test_log2.cpp b/test/src/test_log2.cpp
index 7c39aa7..6cc57bc 100644
--- a/test/src/test_log2.cpp
+++ b/test/src/test_log2.cpp
@@ -9,12 +9,12 @@
 #include "test_infra.h"
 
 TEST(log2, special) {
-  unsigned long nb_tests;
-  double x_start, x_end;
+  int nb_tv;
+  int64_t tv_in_out[2 * (NB_TV_FOR_LOG)] = TV_FOR_LOG;
 
-  COMMENT("log2: current chosen algorithm; reduced argument in FP64 only")
+  nb_tv = NB_TV_FOR_LOG;
 
-  show_special_fp64(rvvlm_log2, "Special Value handling of this function");
+  test_vectors_fp64(rvvlm_log2, tv_in_out, nb_tv);
 }
 
 TEST(log2, around_1) {
diff --git a/test/src/test_pow.cpp b/test/src/test_pow.cpp
index f8c4b2f..4aac6c5 100644
--- a/test/src/test_pow.cpp
+++ b/test/src/test_pow.cpp
@@ -9,17 +9,54 @@
 #include "rvvlm.h"
 #include "test_infra.h"
 
-TEST(pow, special) {
-  unsigned long nb_tests, nb_pts_x, nb_pts_y;
-  double x_start, x_end, y_start, y_end;
-  double target_start, target_end, delta, target;
-  unsigned long nb_targets;
-  bool swap_xy = 0;
-
-  COMMENT("pow: current chosen algorithm; reduced argument in FP64 only")
+#define NB_TV_POW 60
+#define DBL_NEG_171 (int64_t)0xc065600000000000
+#define DBL_NEG_888 (int64_t)0xc08bc00000000000
+#define DBL_NEG_NONINT (int64_t)0xc052466666666666
+#define DBL_POS_171 (int64_t)0x4065600000000000
+#define DBL_POS_888 (int64_t)0x408bc00000000000
 
-  show_special2_fp64(rvvlm_pow, swap_xy,
-                     "Special Value handling of this function");
+TEST(pow, special) {
+  int nb_tv = NB_TV_POW;
+  int swap_xy = 0;
+  int64_t pow_tv[3 * NB_TV_POW] = {
+      DBL_POS_ZERO,   DBL_NEG_171,  DBL_POS_INF,  DBL_NEG_ZERO, DBL_NEG_171,
+      DBL_NEG_INF,    DBL_POS_ZERO, DBL_NEG_888,  DBL_POS_INF,  DBL_NEG_ZERO,
+      DBL_NEG_NONINT, DBL_POS_INF,  DBL_POS_ZERO, DBL_NEG_INF,  DBL_POS_INF,
+      DBL_NEG_ZERO,   DBL_NEG_INF,  DBL_POS_INF,  DBL_POS_ZERO, DBL_NEG_INF,
+      DBL_POS_INF,    DBL_NEG_ZERO, DBL_NEG_INF,  DBL_POS_INF,  DBL_NEG_ONE,
+      DBL_POS_INF,    DBL_POS_ONE,  DBL_NEG_ONE,  DBL_NEG_INF,  DBL_POS_ONE,
+      DBL_POS_ONE,    DBL_POS_GT1,  DBL_POS_ONE,  DBL_POS_ONE,  DBL_POS_INF,
+      DBL_POS_ONE,    DBL_POS_ONE,  DBL_NEG_INF,  DBL_POS_ONE,  DBL_POS_ONE,
+      DBL_QNAN,       DBL_POS_ONE,  DBL_POS_ONE,  DBL_SNAN,     DBL_POS_ONE,
+      DBL_POS_INF,    DBL_POS_ZERO, DBL_POS_ONE,  DBL_NEG_INF,  DBL_POS_ZERO,
+      DBL_POS_ONE,    DBL_POS_GT1,  DBL_POS_ZERO, DBL_POS_ONE,  DBL_POS_INF,
+      DBL_POS_ZERO,   DBL_POS_ONE,  DBL_NEG_INF,  DBL_POS_ZERO, DBL_POS_ONE,
+      DBL_QNAN,       DBL_POS_ZERO, DBL_POS_ONE,  DBL_SNAN,     DBL_POS_ZERO,
+      DBL_POS_ONE,    DBL_POS_INF,  DBL_NEG_ZERO, DBL_POS_ONE,  DBL_NEG_INF,
+      DBL_NEG_ZERO,   DBL_POS_ONE,  DBL_POS_GT1,  DBL_NEG_ZERO, DBL_POS_ONE,
+      DBL_POS_INF,    DBL_NEG_ZERO, DBL_POS_ONE,  DBL_NEG_INF,  DBL_NEG_ZERO,
+      DBL_POS_ONE,    DBL_QNAN,     DBL_NEG_ZERO, DBL_POS_ONE,  DBL_SNAN,
+      DBL_NEG_ZERO,   DBL_POS_ONE,  DBL_NEG_GT1,  DBL_POS_GT1,  DBL_QNAN,
+      DBL_NEG_LT1,    DBL_NEG_INF,  DBL_POS_INF,  DBL_POS_LT1,  DBL_NEG_INF,
+      DBL_POS_INF,    DBL_NEG_GT1,  DBL_NEG_INF,  DBL_POS_ZERO, DBL_POS_GT1,
+      DBL_NEG_INF,    DBL_POS_ZERO, DBL_NEG_LT1,  DBL_POS_INF,  DBL_POS_ZERO,
+      DBL_POS_LT1,    DBL_POS_INF,  DBL_POS_ZERO, DBL_NEG_GT1,  DBL_POS_INF,
+      DBL_POS_INF,    DBL_POS_GT1,  DBL_POS_INF,  DBL_POS_INF,  DBL_NEG_INF,
+      DBL_NEG_171,    DBL_NEG_ZERO, DBL_NEG_INF,  DBL_NEG_888,  DBL_POS_ZERO,
+      DBL_NEG_INF,    DBL_NEG_GT1,  DBL_POS_ZERO, DBL_NEG_INF,  DBL_POS_171,
+      DBL_NEG_INF,    DBL_NEG_INF,  DBL_POS_888,  DBL_POS_INF,  DBL_NEG_INF,
+      DBL_POS_GT1,    DBL_POS_INF,  DBL_POS_INF,  DBL_NEG_171,  DBL_POS_ZERO,
+      DBL_POS_INF,    DBL_NEG_888,  DBL_POS_ZERO, DBL_POS_INF,  DBL_NEG_GT1,
+      DBL_POS_ZERO,   DBL_QNAN,     DBL_POS_LT1,  DBL_QNAN,     DBL_SNAN,
+      DBL_POS_GT1,    DBL_QNAN,     DBL_QNAN,     DBL_NEG_LT1,  DBL_QNAN,
+      DBL_SNAN,       DBL_POS_LT1,  DBL_QNAN,     DBL_QNAN,     DBL_POS_TINY,
+      DBL_QNAN,       DBL_SNAN,     DBL_NEG_TINY, DBL_QNAN,     DBL_POS_LT1,
+      DBL_QNAN,       DBL_QNAN,     DBL_POS_GT1,  DBL_SNAN,     DBL_QNAN,
+      DBL_NEG_LT1,    DBL_QNAN,     DBL_QNAN,     DBL_POS_LT1,  DBL_SNAN,
+      DBL_QNAN,       DBL_POS_TINY, DBL_QNAN,     DBL_QNAN,     DBL_NEG_TINY,
+      DBL_SNAN,       DBL_QNAN,     DBL_POS_GT1,  DBL_POS_INF,  DBL_POS_INF};
+  test_vectors2_fp64(rvvlm_pow, pow_tv, nb_tv, swap_xy);
 }
 
 TEST(pow, medium_args) {
diff --git a/test/src/test_sin.cpp b/test/src/test_sin.cpp
index 6954d44..f8b1847 100644
--- a/test/src/test_sin.cpp
+++ b/test/src/test_sin.cpp
@@ -9,12 +9,12 @@
 #include "test_infra.h"
 
 TEST(sin, special) {
-  unsigned long nb_tests;
-  double x_start, x_end;
+  int nb_tv;
+  int64_t tv_in_out[2 * (NB_TV_FOR_TRIG)] = TV_FOR_TRIG;
 
-  COMMENT("sin: current chosen algorithm; reduced argument in FP64 only")
+  nb_tv = NB_TV_FOR_TRIG;
 
-  show_special_fp64(rvvlm_sin, "Special Value handling of this function");
+  test_vectors_fp64(rvvlm_sin, tv_in_out, nb_tv);
 }
 
 TEST(sin, small_args) {
@@ -34,6 +34,17 @@ TEST(sin, small_args) {
   report_err_fp64(rvvlm_sin, sinl, x_start, x_end, nb_tests);
 }
 
+TEST(sin, near_NPiby2) {
+#include "near_NPiby2_tbl.h"
+
+  unsigned long nb_tests;
+
+  COMMENT("sin: current chosen algorithm; reduced argument in FP64 only")
+
+  nb_tests = (NB_TEST_ARGS);
+  report_err_fp64(rvvlm_sin, sinl, dbl_near_NPiby2_tbl, nb_tests);
+}
+
 TEST(sin, medium_args) {
   unsigned long nb_tests;
   double x_start, x_end;
diff --git a/test/src/test_sincos.cpp b/test/src/test_sincos.cpp
index 3ca7b0f..1c1f873 100644
--- a/test/src/test_sincos.cpp
+++ b/test/src/test_sincos.cpp
@@ -57,3 +57,14 @@ TEST(sincos, large_args) {
   report_err_fp64(rvvlm_sincos, sinl, 1, x_start, x_end, nb_tests);
   report_err_fp64(rvvlm_sincos, cosl, 2, x_start, x_end, nb_tests);
 }
+
+TEST(sincos, near_Npiby2) {
+#include "near_NPiby2_tbl.h"
+  unsigned long nb_tests;
+
+  COMMENT("sin: current chosen algorithm; reduced argument in FP64 only")
+
+  nb_tests = (NB_TEST_ARGS);
+  report_err_fp64(rvvlm_sincos, sinl, 1, dbl_near_NPiby2_tbl, nb_tests);
+  report_err_fp64(rvvlm_sincos, cosl, 2, dbl_near_NPiby2_tbl, nb_tests);
+}
diff --git a/test/src/test_tan.cpp b/test/src/test_tan.cpp
index e029114..b432623 100644
--- a/test/src/test_tan.cpp
+++ b/test/src/test_tan.cpp
@@ -9,12 +9,22 @@
 #include "test_infra.h"
 
 TEST(tan, special) {
+  int nb_tv;
+  int64_t tv_in_out[2 * (NB_TV_FOR_TRIG)] = TV_FOR_TRIG;
+
+  nb_tv = NB_TV_FOR_TRIG;
+
+  test_vectors_fp64(rvvlm_tan, tv_in_out, nb_tv);
+}
+
+TEST(tan, near_NPiby2) {
+#include "near_NPiby2_tbl.h"
   unsigned long nb_tests;
-  double x_start, x_end;
 
   COMMENT("tan: current chosen algorithm; reduced argument in FP64 only")
 
-  show_special_fp64(rvvlm_tan, "Special Value handling of this function");
+  nb_tests = (NB_TEST_ARGS);
+  report_err_fp64(rvvlm_tan, tanl, dbl_near_NPiby2_tbl, nb_tests);
 }
 
 TEST(tan, small_args) {

From 270e4de84150c4949cbd546b9d9f61f18fd9c71b Mon Sep 17 00:00:00 2001
From: Keeran Rothenfusser <keeranr@rivosinc.com>
Date: Wed, 6 Nov 2024 17:18:14 +0100
Subject: [PATCH 2/2] Turn off clang-format for small tables

---
 test/include/test_infra.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/include/test_infra.h b/test/include/test_infra.h
index 0ed300e..6904e53 100644
--- a/test/include/test_infra.h
+++ b/test/include/test_infra.h
@@ -21,6 +21,7 @@
 #define DBL_QNAN (int64_t)0x7FF8000000000000
 #define DBL_SNAN (int64_t)0x7FF4000000000000
 
+// clang-format off
 #define NB_TV_FOR_TRIG 4
 #define TV_FOR_TRIG                                                            \
   {DBL_QNAN,    DBL_QNAN, DBL_SNAN,    DBL_QNAN,                               \
@@ -53,6 +54,7 @@
   {                                                                            \
     printf("\n=====\t" comment "\n");                                          \
   }
+// clang-format on
 
 // Most common interface: testing on 1 interval
 // for 1-in-1-out unit-stride function