From 47922417a7b9a7e3cd40625871810d1f1983aedd Mon Sep 17 00:00:00 2001
From: PingTakPeterTang <pingtaktang@gmail.com>
Date: Thu, 4 Jan 2024 19:56:49 +0800
Subject: [PATCH] added inverse hyperbolic functions acosh/asinh/atanh

---
 CMakeLists.txt                 |   6 ++
 include/rvvlm.h                |  69 +++++++++++++--
 include/rvvlm_acoshD.inc.h     | 120 +++++++++++++++++++++++++
 include/rvvlm_asinhcoshD.inc.h | 147 +++++++++++++++++++++++++++++++
 include/rvvlm_atanhD.inc.h     | 139 +++++++++++++++++++++++++++++
 include/rvvlm_invhyperD.h      | 154 +++++++++++++++++++++++++++++++++
 src/rvvlm_acoshD.c             |  16 ++++
 src/rvvlm_acoshDI.c            |  16 ++++
 src/rvvlm_asinhD.c             |  16 ++++
 src/rvvlm_asinhDI.c            |  16 ++++
 src/rvvlm_atanhD.c             |  14 +++
 src/rvvlm_atanhDI.c            |  14 +++
 test/CMakeLists.txt            |  14 ++-
 test/src/test_acosh.cpp        |  43 +++++++++
 test/src/test_acoshI.cpp       |  29 +++++++
 test/src/test_asinh.cpp        |  63 ++++++++++++++
 test/src/test_asinhI.cpp       |  29 +++++++
 test/src/test_atanh.cpp        |  63 ++++++++++++++
 test/src/test_atanhI.cpp       |  29 +++++++
 19 files changed, 988 insertions(+), 9 deletions(-)
 create mode 100644 include/rvvlm_acoshD.inc.h
 create mode 100644 include/rvvlm_asinhcoshD.inc.h
 create mode 100644 include/rvvlm_atanhD.inc.h
 create mode 100644 include/rvvlm_invhyperD.h
 create mode 100644 src/rvvlm_acoshD.c
 create mode 100644 src/rvvlm_acoshDI.c
 create mode 100644 src/rvvlm_asinhD.c
 create mode 100644 src/rvvlm_asinhDI.c
 create mode 100644 src/rvvlm_atanhD.c
 create mode 100644 src/rvvlm_atanhDI.c
 create mode 100644 test/src/test_acosh.cpp
 create mode 100644 test/src/test_acoshI.cpp
 create mode 100644 test/src/test_asinh.cpp
 create mode 100644 test/src/test_asinhI.cpp
 create mode 100644 test/src/test_atanh.cpp
 create mode 100644 test/src/test_atanhI.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 531ff3c..de929db 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,6 +25,12 @@ set(PROJECT_SOURCES
     ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_atan2DI.c
     ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_atan2piD.c
     ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_atan2piDI.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_acoshD.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_acoshDI.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_asinhD.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_asinhDI.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_atanhD.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_atanhDI.c
     ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_expD_tbl.c
     ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_expD.c
     ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_expDI.c
diff --git a/include/rvvlm.h b/include/rvvlm.h
index 3fb8986..336e837 100644
--- a/include/rvvlm.h
+++ b/include/rvvlm.h
@@ -67,9 +67,9 @@ union sui64_fp64 {
 
 #define FAST2SUM(X, Y, S, s, vlen)                                             \
   do {                                                                         \
-    S = __riscv_vfadd((X), (Y), (vlen));                                       \
-    s = __riscv_vfsub((X), (S), (vlen));                                       \
-    s = __riscv_vfadd((s), (Y), (vlen));                                       \
+    (S) = __riscv_vfadd((X), (Y), (vlen));                                     \
+    (s) = __riscv_vfsub((X), (S), (vlen));                                     \
+    (s) = __riscv_vfadd((s), (Y), (vlen));                                     \
   } while (0)
 
 #define POS2SUM(X, Y, S, s, vlen)                                              \
@@ -92,8 +92,8 @@ union sui64_fp64 {
 
 #define PROD_X1Y1(x, y, prod_hi, prod_lo, vlen)                                \
   do {                                                                         \
-    prod_hi = __riscv_vfmul((x), (y), (vlen));                                 \
-    prod_lo = __riscv_vfmsub((x), (y), (prod_hi), (vlen));                     \
+    (prod_hi) = __riscv_vfmul((x), (y), (vlen));                               \
+    (prod_lo) = __riscv_vfmsub((x), (y), (prod_hi), (vlen));                   \
   } while (0)
 
 #define DIV_N1D2(numer, denom, delta_d, Q, q, vlen)                            \
@@ -115,6 +115,32 @@ union sui64_fp64 {
     (Q) = __riscv_vfadd((Q), _q, (vlen));                                      \
   } while (0)
 
+#define DIV2_N2D2(numer, delta_n, denom, delta_d, Q, delta_Q, vlen)            \
+  do {                                                                         \
+    VFLOAT _q;                                                                 \
+    (Q) = __riscv_vfdiv((numer), (denom), (vlen));                             \
+    _q = __riscv_vfnmsub((Q), (denom), (numer), (vlen));                       \
+    _q = __riscv_vfnmsac(_q, (Q), (delta_d), (vlen));                          \
+    _q = __riscv_vfadd(_q, (delta_n), (vlen));                                 \
+    (delta_Q) = __riscv_vfmul(_q, __riscv_vfrec7((denom), (vlen)), (vlen));    \
+  } while (0)
+
+#define SQRT2_X2(x, delta_x, r, delta_r, vlen)                                 \
+  do {                                                                         \
+    VFLOAT xx = __riscv_vfadd((x), (delta_x), (vlen));                         \
+    VBOOL x_eq_0 = __riscv_vmfeq(xx, fp_posZero, (vlen));                      \
+    xx = __riscv_vfmerge(xx, fp_posOne, x_eq_0, (vlen));                       \
+    (r) = __riscv_vfsqrt(xx, (vlen));                                          \
+    (delta_r) = __riscv_vfnmsub((r), (r), (x), (vlen));                        \
+    (delta_r) = __riscv_vfadd((delta_r), (delta_x), (vlen));                   \
+    (delta_r) = __riscv_vfmul((delta_r), __riscv_vfrec7(xx, (vlen)), (vlen));  \
+    /* (delta_r) = __riscv_vfdiv((delta_r), xx, (vlen)); */                    \
+    (delta_r) = __riscv_vfmul((delta_r), 0x1.0p-1, (vlen));                    \
+    (delta_r) = __riscv_vfmul((delta_r), (r), (vlen));                         \
+    (r) = __riscv_vfmerge((r), fp_posZero, x_eq_0, (vlen));                    \
+    (delta_r) = __riscv_vfmerge((delta_r), fp_posZero, x_eq_0, (vlen));        \
+  } while (0)
+
 #define IDENTIFY(vclass, stencil, identity_mask, vlen)                         \
   identity_mask =                                                              \
       __riscv_vmsgtu(__riscv_vand((vclass), (stencil), (vlen)), 0, (vlen));
@@ -189,6 +215,27 @@ union sui64_fp64 {
 #define RVVLM_ATAN2PIDI_VSET_CONFIG "rvvlm_fp64m2.h"
 #define RVVLM_ATAN2PIDI_FIXEDPT rvvlm_atan2piI
 
+// FP64 acosh function configuration
+#define RVVLM_ACOSHD_VSET_CONFIG "rvvlm_fp64m2.h"
+#define RVVLM_ACOSHD_STD rvvlm_acosh
+
+#define RVVLM_ACOSHDI_VSET_CONFIG "rvvlm_fp64m2.h"
+#define RVVLM_ACOSHDI_STD rvvlm_acoshI
+
+// FP64 asinh function configuration
+#define RVVLM_ASINHD_VSET_CONFIG "rvvlm_fp64m2.h"
+#define RVVLM_ASINHD_STD rvvlm_asinh
+
+#define RVVLM_ASINHDI_VSET_CONFIG "rvvlm_fp64m2.h"
+#define RVVLM_ASINHDI_STD rvvlm_asinhI
+
+// FP64 atanh function configuration
+#define RVVLM_ATANHD_VSET_CONFIG "rvvlm_fp64m2.h"
+#define RVVLM_ATANHD_MIXED rvvlm_atanh
+
+#define RVVLM_ATANHDI_VSET_CONFIG "rvvlm_fp64m2.h"
+#define RVVLM_ATANHDI_MIXED rvvlm_atanhI
+
 // FP64 exp function configuration
 #define RVVLM_EXPD_VSET_CONFIG "rvvlm_fp64m4.h"
 #define RVVLM_EXPD_STD rvvlm_expD_std
@@ -381,6 +428,18 @@ void RVVLM_ATAN2PIDI_FIXEDPT(size_t xy_len, const double *y, size_t stride_y,
                              const double *x, size_t stride_x, double *z,
                              size_t stride_z);
 
+void RVVLM_ACOSHD_STD(size_t x_len, const double *x, double *y);
+void RVVLM_ACOSHDI_STD(size_t x_len, const double *x, size_t stride_x,
+                       double *y, size_t stride_y);
+
+void RVVLM_ASINHD_STD(size_t x_len, const double *x, double *y);
+void RVVLM_ASINHDI_STD(size_t x_len, const double *x, size_t stride_x,
+                       double *y, size_t stride_y);
+
+void RVVLM_ATANHD_MIXED(size_t x_len, const double *x, double *y);
+void RVVLM_ATANHDI_MIXED(size_t x_len, const double *x, size_t stride_x,
+                         double *y, size_t stride_y);
+
 void RVVLM_EXPD_STD(size_t x_len, const double *x, double *y);
 void RVVLM_EXPDI_STD(size_t x_len, const double *x, size_t stride_x, double *y,
                      size_t stride_y);
diff --git a/include/rvvlm_acoshD.inc.h b/include/rvvlm_acoshD.inc.h
new file mode 100644
index 0000000..28f8a65
--- /dev/null
+++ b/include/rvvlm_acoshD.inc.h
@@ -0,0 +1,120 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "rvvlm_invhyperD.h"
+
+#if (STRIDE == UNIT_STRIDE)
+#define F_VER1 RVVLM_ACOSHD_STD
+#else
+#define F_VER1 RVVLM_ACOSHDI_STD
+#endif
+
+#include <fenv.h>
+
+// Acosh(x)  is defined for x >= 1 by the formula log(x + sqrt(x*x - 1))
+// and for the log function log(2^n z), we uses the expansion in terms of atanh
+// n log(2) + 2 atanh((z-1)/(z+1))
+// This algorithm obtains this scale factor 2^n from the input x, and computes
+// the expression x' + sqrt(x'*x' - 2^(-2n)) thus avoiding possible overflow or
+// excess computation such as computing sqrt(x*x - 1) by x * sqrt(1 - 1/(x*x))
+// which needs a division on top of a sqrt.
+void F_VER1(API) {
+  size_t vlen;
+  VFLOAT vx, vx_orig, vy, vy_special;
+  VBOOL special_args;
+
+  SET_ROUNDTONEAREST;
+  // stripmining over input arguments
+  for (; _inarg_n > 0; _inarg_n -= vlen) {
+    vlen = VSET(_inarg_n);
+    vx = VFLOAD_INARG1(vlen);
+#if defined(COMPILE_FOR_ASINH)
+    vx_orig = vx;
+#endif
+
+#if defined(COMPILE_FOR_ACOSH)
+    // Handle Inf and NaN and input <= 1.0
+    EXCEPTION_HANDLING_ACOSH(vx, special_args, vy_special, vlen);
+#else
+    // Handle Inf and NaN and |input} < 2^(-30)
+    EXCEPTION_HANDLING_ASINH(vx, special_args, vy_special, vlen);
+    vx = __riscv_vfsgnj(vx, fp_posOne, vlen);
+#endif
+
+    // Need to scale x so that x + sqrt(x*x +/- 1) doesn't overflow
+    // Since x >= 1, we scale x down by 2^(-550) if x >= 2^500 and set 1 to 0
+    VINT n;
+    VFLOAT u;
+    SCALE_X(vx, n, u, vlen);
+    // n is 0 or 500; and u is +/-1.0 or 0.0
+
+    // sqrt(x*x + u) extra precisely
+    VFLOAT A, a;
+#if defined(COMPILE_FOR_ACOSH)
+    XSQ_PLUS_U_ACOSH(vx, u, A, a, vlen);
+#else
+    XSQ_PLUS_U_ASINH(vx, u, A, a, vlen);
+#endif
+    // A + a is x*x + u
+
+    VFLOAT B, b;
+    SQRT2_X2(A, a, B, b, vlen);
+    // B + b is sqrt(x*x + u) to about 7 extra bits
+
+    // x dominants B for acosh
+    VFLOAT S, s;
+#if defined(COMPILE_FOR_ACOSH)
+    FAST2SUM(vx, B, S, s, vlen);
+    s = __riscv_vfadd(s, b, vlen);
+#else
+    FAST2SUM(B, vx, S, s, vlen);
+    s = __riscv_vfadd(s, b, vlen);
+#endif
+
+    // x + sqrt(x*x + u) is accurately represented as S + s
+    // We first scale S, s so that it falls roughly in [1/rt2, rt2]
+    SCALE_4_LOG(S, s, n, vlen);
+
+    // log(x + sqrt(x*x + u)) = n * log(2) + log(y); y = S + s
+    // since log(y) = 2 atanh( (y-1)/(y+1) ) to be approximated
+    // by t + t^3 * poly(t^2), t = 2 (y-1)/(y+1)
+    // We now compute the numerator and denominator and its quotient
+    // to extra precision
+    VFLOAT numer, delta_numer, denom, delta_denom;
+    TRANSFORM_2_ATANH(S, s, numer, delta_numer, denom, delta_denom, vlen);
+
+    VFLOAT r_hi, r_lo, r;
+    DIV2_N2D2(numer, delta_numer, denom, delta_denom, r_hi, r_lo, vlen);
+    r = __riscv_vfadd(r_hi, r_lo, vlen);
+
+    VFLOAT n_flt = __riscv_vfcvt_f(n, vlen);
+
+    VFLOAT poly;
+    LOG_POLY(r, r_lo, poly, vlen);
+    // At this point r_hi + poly approximates log(X)
+
+    // Reconstruction: logB(in_arg) = n logB(2) + log(X) * logB(e), computed as
+    // n*(logB_2_hi + logB_2_lo) + r * (logB_e_hi + logB_e_lo) + poly *
+    // logB_e_hi It is best to compute n * logB_2_hi + r * logB_e_hi in extra
+    // precision
+
+    A = __riscv_vfmul(n_flt, LOG2_HI, vlen);
+    FAST2SUM(A, r_hi, S, s, vlen);
+    s = __riscv_vfmacc(s, LOG2_LO, n_flt, vlen);
+    s = __riscv_vfadd(s, poly, vlen);
+
+    vy = __riscv_vfadd(S, s, vlen);
+#if defined(COMPILE_FOR_ASINH)
+    vy = __riscv_vfsgnj(vy, vx_orig, vlen);
+#endif
+    vy = __riscv_vmerge(vy, vy_special, special_args, vlen);
+
+    // copy vy into y and increment addr pointers
+    VFSTORE_OUTARG1(vy, vlen);
+
+    INCREMENT_INARG1(vlen);
+    INCREMENT_OUTARG1(vlen);
+  }
+  RESTORE_FRM;
+}
diff --git a/include/rvvlm_asinhcoshD.inc.h b/include/rvvlm_asinhcoshD.inc.h
new file mode 100644
index 0000000..a57e971
--- /dev/null
+++ b/include/rvvlm_asinhcoshD.inc.h
@@ -0,0 +1,147 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "rvvlm_invhyperD.h"
+
+#if defined(COMPILE_FOR_ACOSH)
+#if (STRIDE == UNIT_STRIDE)
+#define F_VER1 RVVLM_ACOSHD_STD
+#else
+#define F_VER1 RVVLM_ACOSHDI_STD
+#endif
+#else
+#if (STRIDE == UNIT_STRIDE)
+#define F_VER1 RVVLM_ASINHD_STD
+#else
+#define F_VER1 RVVLM_ASINHDI_STD
+#endif
+#endif
+
+#include <fenv.h>
+
+// Acosh(x)  is defined for x >= 1 by the formula log(x + sqrt(x*x - 1))
+// Asinh(x) is defined for all finite x by the formula log(x + sqrt(x*x + 1))
+// Acosh is always positive, and Asinh(-x) = -Asinh(x). Thus we in general
+// work with |x| and restore the sign (if necessary) in the end.
+// For the log function log(2^n z), we uses the expansion in terms of atanh:
+// n log(2) + 2 atanh((z-1)/(z+1))
+// The algorithm here first scales down x by 2^(-550) when |x| >= 2^500.
+// And for such large x, both acosh and asinh equals log(2x) to very high
+// precision. We safely ignore the +/- 1 when this is the case.
+//
+// A power 2^n is determined by the value of x + sqrt(x*x +/- 1) so that
+// scaling the expression by 2^(-n) transforms it to the range [0.71, 1.42].
+// Log(t) for t in this region is computed by 2 atanh((t-1)/(t+1))
+// More precisely, we use s = 2(t-1)/(t+1) and approximate the function
+// 2 atanh(s/2) by  s + s^3 * polynomial(s^2).
+// The final result is n * log(2) + s + s^3 * polynomial(s^2)
+// which is computed with care.
+void F_VER1(API) {
+  size_t vlen;
+  VFLOAT vx, vx_orig, vy, vy_special;
+  VBOOL special_args;
+
+  SET_ROUNDTONEAREST;
+  // stripmining over input arguments
+  for (; _inarg_n > 0; _inarg_n -= vlen) {
+    vlen = VSET(_inarg_n);
+    vx = VFLOAD_INARG1(vlen);
+#if defined(COMPILE_FOR_ASINH)
+    vx_orig = vx;
+#endif
+
+#if defined(COMPILE_FOR_ACOSH)
+    // Handle Inf and NaN and input <= 1.0
+    EXCEPTION_HANDLING_ACOSH(vx, special_args, vy_special, vlen);
+#else
+    // Handle Inf and NaN and |input| < 2^(-30)
+    EXCEPTION_HANDLING_ASINH(vx, special_args, vy_special, vlen);
+    vx = __riscv_vfsgnj(vx, fp_posOne, vlen);
+#endif
+
+    // Need to scale x so that x + sqrt(x*x +/- 1) doesn't overflow
+    // We scale x down by 2^(-550) if x >= 2^500 and set the "+/- 1" to 0
+    VINT n;
+    VFLOAT u;
+    SCALE_X(vx, n, u, vlen);
+    // n is 0 or 500; and u is +/-1.0 or 0.0
+
+    // sqrt(x*x + u) extra precisely
+    VFLOAT A, a;
+#if defined(COMPILE_FOR_ACOSH)
+    XSQ_PLUS_U_ACOSH(vx, u, A, a, vlen);
+#else
+    XSQ_PLUS_U_ASINH(vx, u, A, a, vlen);
+#endif
+    // A + a is x*x + u
+
+    VFLOAT B, b;
+#if defined(COMPILE_FOR_ACOSH)
+    SQRT2_X2(A, a, B, b, vlen);
+    // B + b is sqrt(x*x + u) to about 7 extra bits
+#else
+    // For asinh, we need the sqrt to double-double precision
+    VFLOAT recip = __riscv_vfrdiv(A, fp_posOne, vlen);
+    B = __riscv_vfsqrt(A, vlen);
+    b = __riscv_vfnmsub(B, B, A, vlen);
+    b = __riscv_vfadd(b, a, vlen);
+    VFLOAT B_recip = __riscv_vfmul(B, recip, vlen);
+    b = __riscv_vfmul(b, 0x1.0p-1, vlen);
+    b = __riscv_vfmul(b, B_recip, vlen);
+#endif
+
+    VFLOAT S, s;
+#if defined(COMPILE_FOR_ACOSH)
+    // x dominantes B for acosh
+    FAST2SUM(vx, B, S, s, vlen);
+    s = __riscv_vfadd(s, b, vlen);
+#else
+    // B dominates x for asinh
+    FAST2SUM(B, vx, S, s, vlen);
+    s = __riscv_vfadd(s, b, vlen);
+#endif
+
+    // x + sqrt(x*x + u) is accurately represented as S + s
+    // We first scale S, s by 2^(-n) so that the scaled value
+    // falls roughly in [1/rt2, rt2]
+    SCALE_4_LOG(S, s, n, vlen);
+
+    // log(x + sqrt(x*x + u)) = n * log(2) + log(y); y = S + s
+    // We use log(y) = 2 atanh( (y-1)/(y+1) ) and approximate the latter
+    // by t + t^3 * poly(t^2), t = 2 (y-1)/(y+1)
+
+    // We now compute the numerator 2(y-1) and denominator y+1 and their
+    // quotient to extra precision
+    VFLOAT numer, delta_numer, denom, delta_denom;
+    TRANSFORM_2_ATANH(S, s, numer, delta_numer, denom, delta_denom, vlen);
+
+    VFLOAT r_hi, r_lo, r;
+    DIV2_N2D2(numer, delta_numer, denom, delta_denom, r_hi, r_lo, vlen);
+    r = __riscv_vfadd(r_hi, r_lo, vlen);
+
+    VFLOAT poly;
+    LOG_POLY(r, r_lo, poly, vlen);
+    // At this point r_hi + poly approximates log(X)
+
+    // Compose the final result: n * log(2) + r_hi + poly
+    VFLOAT n_flt = __riscv_vfcvt_f(n, vlen);
+    A = __riscv_vfmul(n_flt, LOG2_HI, vlen);
+    FAST2SUM(A, r_hi, S, s, vlen);
+    s = __riscv_vfmacc(s, LOG2_LO, n_flt, vlen);
+    s = __riscv_vfadd(s, poly, vlen);
+
+    vy = __riscv_vfadd(S, s, vlen);
+#if defined(COMPILE_FOR_ASINH)
+    vy = __riscv_vfsgnj(vy, vx_orig, vlen);
+#endif
+    vy = __riscv_vmerge(vy, vy_special, special_args, vlen);
+
+    // copy vy into y and increment addr pointers
+    VFSTORE_OUTARG1(vy, vlen);
+
+    INCREMENT_INARG1(vlen);
+    INCREMENT_OUTARG1(vlen);
+  }
+  RESTORE_FRM;
+}
diff --git a/include/rvvlm_atanhD.inc.h b/include/rvvlm_atanhD.inc.h
new file mode 100644
index 0000000..10aeab0
--- /dev/null
+++ b/include/rvvlm_atanhD.inc.h
@@ -0,0 +1,139 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "rvvlm_invhyperD.h"
+
+#if (STRIDE == UNIT_STRIDE)
+#define F_VER1 RVVLM_ATANHD_MIXED
+#else
+#define F_VER1 RVVLM_ATANHDI_MIXED
+#endif
+
+#include <fenv.h>
+
+// Atanh(x) is defined only for |x| <= 1. As atanh(-x) = -atanh(x), the
+// main computation works with |x|.
+// For |x| > 1 and x being a sNaN, the invalid signal has to be generated
+// together with a returned valued of NaN. For a qNaN input, no signal is
+// generated. And atan(+/- 1) yiedls +/- Inf, but a div-by-zero signal has
+// to be generated.
+//
+// For 0 < x < 1, we use the formula atan(x) = (1/2) log( (1+x)/(1-x) ).
+// The usual technique is to find a scale s = 2^(-n) so that
+// r = s * (1+x)/(1-x) falls roughly in the region [1/sqrt(2), sqrt(2)].
+// Thus the desired result is (1/2)(n * log(2) + log(r)).
+// Somewhat ironically, log(r) is usually approximated in terms of atanh,
+// as its Taylor series around 0 converges much faster than that of log(r)
+// around 1. log(r) = 2 atanh( (r-1)/(r+1) ).
+// Hence, atan(x) = (n/2)log(2) + atan([(1+x)-(1-x)/s]/[(1+x)+(1-x)/s]).
+//
+// This implementation obtains s=2^(-n) using the approximate reciprocal
+// instruction rather than computing (1+x)/(1-x) to extra precision.
+// It then combines the two transformations into
+// atanh( [(1+x) - (1-x)/s] / [(1+x) + (1-x)/s] ) requiring only
+// one division, instead of two.
+// We further observe that instead of using multiple extra-precise
+// simulations to obtain both the numerator and denominator accurately,
+// we can use fixed-point computations.
+// As long as the original input |x| >= 0.248, a scale of 60 allows
+// both numerator and denominator to maintain high precision without overflow,
+// elminating many double-double like simulations. For |x| < 0.248, the
+// core polynomial evaluated at x yields the result.
+//
+void F_VER1(API) {
+  size_t vlen;
+  VFLOAT vx, vx_orig, vy, vy_special;
+  VBOOL special_args;
+
+  SET_ROUNDTONEAREST;
+  // stripmining over input arguments
+  for (; _inarg_n > 0; _inarg_n -= vlen) {
+    vlen = VSET(_inarg_n);
+    vx = VFLOAD_INARG1(vlen);
+    vx_orig = vx;
+
+    // Handle Inf, NaN, |input| >= 1, and  |input| < 2^(-30)
+    EXCEPTION_HANDLING_ATANH(vx, special_args, vy_special, vlen);
+    vx = __riscv_vfsgnj(vx, fp_posOne, vlen);
+
+    // At this point vx are positive number, either 0, or 2^(-30) <= x < 1.
+
+    // Get n so that 2^(-n) * (1+x)/(1-x) is in roughly in the range [1/rt2,
+    // rt2]
+    VUINT n;
+    VFLOAT one_plus_x, one_minus_x;
+    one_plus_x = __riscv_vfadd(vx, fp_posOne, vlen);
+    one_minus_x = __riscv_vfrsub(vx, fp_posOne, vlen);
+    // note one_minus_x >= 2^(-53) is never 0
+    VFLOAT ratio =
+        __riscv_vfmul(one_plus_x, __riscv_vfrec7(one_minus_x, vlen), vlen);
+    n = __riscv_vadd(__riscv_vsrl(F_AS_U(ratio), MAN_LEN - 8, vlen), 0x96,
+                     vlen);
+    n = __riscv_vsub(__riscv_vsrl(n, 8, vlen), EXP_BIAS, vlen);
+
+    VINT X = __riscv_vfcvt_x(__riscv_vfmul(vx, 0x1.0p60, vlen), vlen);
+    VINT Numer, Denom;
+    // no overflow, so it does not matter if we use the saturating add or not
+    VINT One_plus_X = __riscv_vadd(X, ONE_Q60, vlen);
+    VINT One_minus_X = __riscv_vrsub(X, ONE_Q60, vlen);
+    One_minus_X = __riscv_vsll(One_minus_X, n, vlen);
+    Numer = __riscv_vsub(One_plus_X, One_minus_X, vlen);
+    Denom = __riscv_vadd(One_plus_X, One_minus_X, vlen);
+    VFLOAT numer, delta_numer, denom, delta_denom;
+    numer = __riscv_vfcvt_f(Numer, vlen);
+    VINT Tail = __riscv_vsub(Numer, __riscv_vfcvt_x(numer, vlen), vlen);
+    delta_numer = __riscv_vfcvt_f(Tail, vlen);
+    denom = __riscv_vfcvt_f(Denom, vlen);
+    Tail = __riscv_vsub(Denom, __riscv_vfcvt_x(denom, vlen), vlen);
+    delta_denom = __riscv_vfcvt_f(Tail, vlen);
+
+    VFLOAT r_hi, r_lo, r;
+    DIV2_N2D2(numer, delta_numer, denom, delta_denom, r_hi, r_lo, vlen);
+    VBOOL x_in_range = __riscv_vmflt(vx, 0x1.fcp-3, vlen);
+    r_hi = __riscv_vmerge(r_hi, vx, x_in_range, vlen);
+    r_lo = __riscv_vfmerge(r_lo, fp_posZero, x_in_range, vlen);
+    n = __riscv_vmerge(n, 0, x_in_range, vlen);
+
+    r = __riscv_vfadd(r_hi, r_lo, vlen);
+    VFLOAT rsq = __riscv_vfmul(r, r, vlen);
+    VFLOAT rcube = __riscv_vfmul(rsq, r, vlen);
+    VFLOAT r4 = __riscv_vfmul(rsq, rsq, vlen);
+    VFLOAT r8 = __riscv_vfmul(r4, r4, vlen);
+    VFLOAT poly_right = PSTEP(
+        0x1.745841007bbc5p-4, rsq,
+        PSTEP(0x1.3b99547086d02p-4, rsq,
+              PSTEP(0x1.08c7af78d89d3p-4, 0x1.35f7ae00ccaf9p-4, rsq, vlen),
+              vlen),
+        vlen);
+    VFLOAT poly_left = PSTEP(
+        0x1.555555555547dp-2, rsq,
+        PSTEP(0x1.99999999d0d12p-3, rsq,
+              PSTEP(0x1.249248fe05e8dp-3, 0x1.c71c8bc60dde3p-4, rsq, vlen),
+              vlen),
+        vlen);
+    VFLOAT poly = __riscv_vfmadd(poly_right, r8, poly_left, vlen);
+    poly = __riscv_vfmadd(poly, rcube, r_lo, vlen);
+    // At this point r_hi + poly approximates atanh(r)
+
+    // Compose the final answer  (n/2)*log(2) + atanh(r)
+    VFLOAT n_flt = __riscv_vfcvt_f(n, vlen);
+    VFLOAT A = __riscv_vfmul(n_flt, LOG2_BY2_HI, vlen);
+    VFLOAT S, s;
+    FAST2SUM(A, r_hi, S, s, vlen);
+    s = __riscv_vfmacc(s, LOG2_BY2_LO, n_flt, vlen);
+    s = __riscv_vfadd(s, poly, vlen);
+    vy = __riscv_vfadd(S, s, vlen);
+
+    vy = __riscv_vfsgnj(vy, vx_orig, vlen);
+
+    vy = __riscv_vmerge(vy, vy_special, special_args, vlen);
+
+    // copy vy into y and increment addr pointers
+    VFSTORE_OUTARG1(vy, vlen);
+
+    INCREMENT_INARG1(vlen);
+    INCREMENT_OUTARG1(vlen);
+  }
+  RESTORE_FRM;
+}
diff --git a/include/rvvlm_invhyperD.h b/include/rvvlm_invhyperD.h
new file mode 100644
index 0000000..a71926b
--- /dev/null
+++ b/include/rvvlm_invhyperD.h
@@ -0,0 +1,154 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#define LOG2_HI 0x1.62e42fefa4000p-1
+#define LOG2_LO -0x1.8432a1b0e2634p-43
+#define LOG2_BY2_HI 0x1.62e42fefa4000p-2
+#define LOG2_BY2_LO -0x1.8432a1b0e2634p-44
+#define ONE_Q60 0x1000000000000000
+
+#if defined(COMPILE_FOR_ACOSH)
+#define PLUS_MINUS_ONE -0x1.0p0
+#else
+#define PLUS_MINUS_ONE 0x1.0p0
+#endif
+
+#define EXCEPTION_HANDLING_ACOSH(vx, special_args, vy_special, vlen)           \
+  do {                                                                         \
+    VFLOAT vxm1 = __riscv_vfsub((vx), fp_posOne, (vlen));                      \
+    VUINT vclass = __riscv_vfclass(vxm1, (vlen));                              \
+    IDENTIFY(vclass, class_negative, (special_args), (vlen));                  \
+    vxm1 = __riscv_vfmerge(vxm1, fp_sNaN, (special_args), (vlen));             \
+    IDENTIFY(vclass, class_NaN | class_Inf | class_negative | class_Zero,      \
+             (special_args), (vlen));                                          \
+    if (__riscv_vcpop((special_args), (vlen)) > 0) {                           \
+      (vy_special) = __riscv_vfmul((special_args), vxm1, vxm1, (vlen));        \
+      (vx) = __riscv_vfmerge((vx), fp_posOne, (special_args), (vlen));         \
+    }                                                                          \
+  } while (0)
+
+#define EXCEPTION_HANDLING_ASINH(vx, special_args, vy_special, vlen)           \
+  do {                                                                         \
+    VUINT vclass = __riscv_vfclass((vx), (vlen));                              \
+    VBOOL Inf_or_NaN_or_pm0;                                                   \
+    IDENTIFY(vclass, class_NaN | class_Inf | class_Zero, Inf_or_NaN_or_pm0,    \
+             (vlen));                                                          \
+    VUINT expo_x = __riscv_vsrl(F_AS_U(vx), MAN_LEN, (vlen));                  \
+    expo_x = __riscv_vand(expo_x, 0x7FF, (vlen));                              \
+    VBOOL x_small = __riscv_vmsltu(expo_x, EXP_BIAS - 30, (vlen));             \
+    (special_args) = __riscv_vmor(Inf_or_NaN_or_pm0, x_small, (vlen));         \
+    if (__riscv_vcpop((special_args), (vlen)) > 0) {                           \
+      VFLOAT tmp = __riscv_vfmadd(x_small, (vx), -0x1.0p-60, (vx), (vlen));    \
+      (vy_special) = __riscv_vmerge((vy_special), tmp, x_small, (vlen));       \
+      tmp = __riscv_vfadd(Inf_or_NaN_or_pm0, (vx), (vx), (vlen));              \
+      (vy_special) =                                                           \
+          __riscv_vmerge((vy_special), tmp, Inf_or_NaN_or_pm0, (vlen));        \
+      (vx) = __riscv_vfmerge((vx), fp_posZero, (special_args), (vlen));        \
+    }                                                                          \
+  } while (0)
+
+#define EXCEPTION_HANDLING_ATANH(vx, special_args, vy_special, vlen)           \
+  do {                                                                         \
+    VUINT expo_x = __riscv_vand(__riscv_vsrl(F_AS_U(vx), MAN_LEN, (vlen)),     \
+                                0x7FF, (vlen));                                \
+    VBOOL x_large = __riscv_vmsgeu(expo_x, EXP_BIAS, (vlen));                  \
+    VBOOL x_small = __riscv_vmsltu(expo_x, EXP_BIAS - 30, (vlen));             \
+    (special_args) = __riscv_vmor(x_large, x_small, (vlen));                   \
+    if (__riscv_vcpop((special_args), (vlen)) > 0) {                           \
+      VFLOAT abs_x = __riscv_vfsgnj((vx), fp_posOne, (vlen));                  \
+      VBOOL x_gt_1 = __riscv_vmfgt(abs_x, fp_posOne, (vlen));                  \
+      VBOOL x_eq_1 = __riscv_vmfeq(abs_x, fp_posOne, (vlen));                  \
+      /* substitute |x| > 1 with sNaN */                                       \
+      (vx) = __riscv_vfmerge((vx), fp_sNaN, x_gt_1, (vlen));                   \
+      /* substitute |x| = 1 with +/-Inf and generate div-by-zero signal */     \
+      VFLOAT tmp = VFMV_VF(fp_posZero, (vlen));                                \
+      tmp = __riscv_vfsgnj(tmp, (vx), (vlen));                                 \
+      tmp = __riscv_vfrec7(x_eq_1, tmp, (vlen));                               \
+      (vy_special) = __riscv_vfadd((special_args), (vx), (vx), (vlen));        \
+      (vy_special) = __riscv_vmerge((vy_special), tmp, x_eq_1, (vlen));        \
+      tmp = __riscv_vfmadd(x_small, (vx), 0x1.0p-60, (vx), (vlen));            \
+      (vy_special) = __riscv_vmerge((vy_special), tmp, x_small, (vlen));       \
+      (vx) = __riscv_vfmerge((vx), fp_posZero, (special_args), (vlen));        \
+    }                                                                          \
+  } while (0)
+
+// scale x down by 2^(-550) and set u to 0 if x >= 2^500
+#define SCALE_X(vx, n, u, vlen)                                                \
+  do {                                                                         \
+    VUINT expo_x = __riscv_vsrl(F_AS_U((vx)), MAN_LEN, (vlen));                \
+    VBOOL x_large = __riscv_vmsgeu(expo_x, EXP_BIAS + 500, (vlen));            \
+    (n) = __riscv_vxor((n), (n), (vlen));                                      \
+    (n) = __riscv_vmerge((n), 550, x_large, (vlen));                           \
+    (u) = VFMV_VF(PLUS_MINUS_ONE, (vlen));                                     \
+    (u) = __riscv_vfmerge((u), fp_posZero, x_large, (vlen));                   \
+    (vx) = I_AS_F(                                                             \
+        __riscv_vsub(F_AS_I(vx), __riscv_vsll((n), MAN_LEN, (vlen)), (vlen))); \
+  } while (0)
+
+// 2^(-50) <= X <= 2^500, u is -1 or 0
+// If u is -1, 1 <= X < 2^500
+#define XSQ_PLUS_U_ACOSH(vx, u, A, a, vlen)                                    \
+  do {                                                                         \
+    VFLOAT P, p;                                                               \
+    PROD_X1Y1((vx), (vx), P, p, (vlen));                                       \
+    VFLOAT tmp1, tmp2;                                                         \
+    FAST2SUM(P, (u), tmp1, tmp2, (vlen));                                      \
+    tmp2 = __riscv_vfadd(tmp2, p, (vlen));                                     \
+    FAST2SUM(tmp1, tmp2, (A), (a), (vlen));                                    \
+  } while (0)
+
+#define XSQ_PLUS_U_ASINH(vx, u, A, a, vlen)                                    \
+  do {                                                                         \
+    VFLOAT P, p;                                                               \
+    PROD_X1Y1((vx), (vx), P, p, (vlen));                                       \
+    VFLOAT tmp1, tmp2;                                                         \
+    POS2SUM(P, (u), tmp1, tmp2, (vlen));                                       \
+    tmp2 = __riscv_vfadd(tmp2, p, (vlen));                                     \
+    POS2SUM(tmp1, tmp2, (A), (a), (vlen));                                     \
+  } while (0)
+
+// scale x down by 2^(-550) and set u to 0 if x >= 2^500
+#define SCALE_4_LOG(S, s, n, vlen)                                             \
+  do {                                                                         \
+    VINT expo_x = __riscv_vsra(F_AS_I((S)), MAN_LEN - 8, (vlen));              \
+    expo_x = __riscv_vadd(expo_x, 0x96, (vlen));                               \
+    expo_x = __riscv_vsra(expo_x, 8, (vlen));                                  \
+    VINT n_adjust = __riscv_vsub(expo_x, EXP_BIAS, (vlen));                    \
+    (n) = __riscv_vadd((n), n_adjust, (vlen));                                 \
+    expo_x = __riscv_vsll(__riscv_vrsub(expo_x, 2 * EXP_BIAS, (vlen)),         \
+                          MAN_LEN, (vlen));                                    \
+    (S) = I_AS_F(__riscv_vsub(                                                 \
+        F_AS_I((S)), __riscv_vsll(n_adjust, MAN_LEN, (vlen)), (vlen)));        \
+    (s) = __riscv_vfmul((s), I_AS_F(expo_x), (vlen));                          \
+  } while (0)
+
+#define TRANSFORM_2_ATANH(S, s, numer, delta_numer, denom, delta_denom, vlen)  \
+  do {                                                                         \
+    VFLOAT S_tmp = __riscv_vfsub((S), fp_posOne, (vlen));                      \
+    FAST2SUM(S_tmp, (s), (numer), (delta_numer), (vlen));                      \
+    (numer) = __riscv_vfadd((numer), (numer), (vlen));                         \
+    (delta_numer) = __riscv_vfadd((delta_numer), (delta_numer), (vlen));       \
+    S_tmp = VFMV_VF(fp_posOne, (vlen));                                        \
+    FAST2SUM(S_tmp, (S), (denom), (delta_denom), (vlen));                      \
+    (delta_denom) = __riscv_vfadd((delta_denom), (s), (vlen));                 \
+  } while (0)
+
+#define LOG_POLY(r, r_lo, poly, vlen)                                          \
+  do {                                                                         \
+    VFLOAT rsq = __riscv_vfmul((r), (r), (vlen));                              \
+    VFLOAT rcube = __riscv_vfmul(rsq, (r), (vlen));                            \
+    VFLOAT r6 = __riscv_vfmul(rcube, rcube, (vlen));                           \
+    VFLOAT poly_right = PSTEP(0x1.c71c543983a27p-12, rsq,                      \
+                              PSTEP(0x1.7465c27ee47d0p-14, rsq,                \
+                                    PSTEP(0x1.39af2e90a6554p-16,               \
+                                          0x1.2e74f2255e096p-18, rsq, (vlen)), \
+                                    (vlen)),                                   \
+                              (vlen));                                         \
+    VFLOAT poly_left =                                                         \
+        PSTEP(0x1.555555555558cp-4, rsq,                                       \
+              PSTEP(0x1.9999999982550p-7, 0x1.2492493f7cc71p-9, rsq, (vlen)),  \
+              (vlen));                                                         \
+    (poly) = __riscv_vfmadd(poly_right, r6, poly_left, (vlen));                \
+    (poly) = __riscv_vfmadd(poly, rcube, (r_lo), (vlen));                      \
+  } while (0)
diff --git a/src/rvvlm_acoshD.c b/src/rvvlm_acoshD.c
new file mode 100644
index 0000000..0a1f4a4
--- /dev/null
+++ b/src/rvvlm_acoshD.c
@@ -0,0 +1,16 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <riscv_vector.h>
+#include <stdio.h>
+
+#include "rvvlm.h"
+#define API_SIGNATURE API_SIGNATURE_11
+#define STRIDE UNIT_STRIDE
+
+#include RVVLM_ACOSHD_VSET_CONFIG
+
+#define COMPILE_FOR_ACOSH
+
+#include "rvvlm_asinhcoshD.inc.h"
diff --git a/src/rvvlm_acoshDI.c b/src/rvvlm_acoshDI.c
new file mode 100644
index 0000000..7685f54
--- /dev/null
+++ b/src/rvvlm_acoshDI.c
@@ -0,0 +1,16 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <riscv_vector.h>
+#include <stdio.h>
+
+#include "rvvlm.h"
+#define API_SIGNATURE API_SIGNATURE_11
+#define STRIDE GENERAL_STRIDE
+
+#include RVVLM_ACOSHDI_VSET_CONFIG
+
+#define COMPILE_FOR_ACOSH
+
+#include "rvvlm_asinhcoshD.inc.h"
diff --git a/src/rvvlm_asinhD.c b/src/rvvlm_asinhD.c
new file mode 100644
index 0000000..8665eb2
--- /dev/null
+++ b/src/rvvlm_asinhD.c
@@ -0,0 +1,16 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <riscv_vector.h>
+#include <stdio.h>
+
+#include "rvvlm.h"
+#define API_SIGNATURE API_SIGNATURE_11
+#define STRIDE UNIT_STRIDE
+
+#include RVVLM_ASINHD_VSET_CONFIG
+
+#define COMPILE_FOR_ASINH
+
+#include "rvvlm_asinhcoshD.inc.h"
diff --git a/src/rvvlm_asinhDI.c b/src/rvvlm_asinhDI.c
new file mode 100644
index 0000000..bcc76b0
--- /dev/null
+++ b/src/rvvlm_asinhDI.c
@@ -0,0 +1,16 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <riscv_vector.h>
+#include <stdio.h>
+
+#include "rvvlm.h"
+#define API_SIGNATURE API_SIGNATURE_11
+#define STRIDE GENERAL_STRIDE
+
+#include RVVLM_ASINHDI_VSET_CONFIG
+
+#define COMPILE_FOR_ASINH
+
+#include "rvvlm_asinhcoshD.inc.h"
diff --git a/src/rvvlm_atanhD.c b/src/rvvlm_atanhD.c
new file mode 100644
index 0000000..837eea9
--- /dev/null
+++ b/src/rvvlm_atanhD.c
@@ -0,0 +1,14 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <riscv_vector.h>
+#include <stdio.h>
+
+#include "rvvlm.h"
+#define API_SIGNATURE API_SIGNATURE_11
+#define STRIDE UNIT_STRIDE
+
+#include RVVLM_ATANHD_VSET_CONFIG
+
+#include "rvvlm_atanhD.inc.h"
diff --git a/src/rvvlm_atanhDI.c b/src/rvvlm_atanhDI.c
new file mode 100644
index 0000000..08789c3
--- /dev/null
+++ b/src/rvvlm_atanhDI.c
@@ -0,0 +1,14 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <riscv_vector.h>
+#include <stdio.h>
+
+#include "rvvlm.h"
+#define API_SIGNATURE API_SIGNATURE_11
+#define STRIDE GENERAL_STRIDE
+
+#include RVVLM_ATANHDI_VSET_CONFIG
+
+#include "rvvlm_atanhD.inc.h"
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 7163286..4d6964b 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -70,14 +70,20 @@ set(TEST_SOURCES
 #   ${CMAKE_CURRENT_SOURCE_DIR}/src/test_atan2I.cpp
 #   ${CMAKE_CURRENT_SOURCE_DIR}/src/test_atan2pi.cpp
 #   ${CMAKE_CURRENT_SOURCE_DIR}/src/test_atan2piI.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_acosh.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_acoshI.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_asinh.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_asinhI.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_atanh.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_atanhI.cpp
 #   ${CMAKE_CURRENT_SOURCE_DIR}/src/test_exp.cpp
 #   ${CMAKE_CURRENT_SOURCE_DIR}/src/test_expI.cpp
 #   ${CMAKE_CURRENT_SOURCE_DIR}/src/test_exp2.cpp
 #   ${CMAKE_CURRENT_SOURCE_DIR}/src/test_exp2I.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_exp10.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_exp10I.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_expm1.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_expm1I.cpp
+#   ${CMAKE_CURRENT_SOURCE_DIR}/src/test_exp10.cpp
+#   ${CMAKE_CURRENT_SOURCE_DIR}/src/test_exp10I.cpp
+#   ${CMAKE_CURRENT_SOURCE_DIR}/src/test_expm1.cpp
+#   ${CMAKE_CURRENT_SOURCE_DIR}/src/test_expm1I.cpp
 #   ${CMAKE_CURRENT_SOURCE_DIR}/src/test_log.cpp
 #   ${CMAKE_CURRENT_SOURCE_DIR}/src/test_logI.cpp
 #   ${CMAKE_CURRENT_SOURCE_DIR}/src/test_log2.cpp
diff --git a/test/src/test_acosh.cpp b/test/src/test_acosh.cpp
new file mode 100644
index 0000000..6f833d4
--- /dev/null
+++ b/test/src/test_acosh.cpp
@@ -0,0 +1,43 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <math.h>
+#include <stdio.h>
+
+#include "rvvlm.h"
+#include "test_infra.h"
+
+#define COMMENT(comment)                                                       \
+  { printf("\n=====\t" comment "\n"); }
+
+int main() {
+  unsigned long nb_tests;
+  double x_start, x_end;
+
+  COMMENT("acosh: current chosen algorithm; reduced argument in FP64 only")
+
+  show_special_fp64(rvvlm_acosh, "Special Value handling of this function");
+
+  x_start = 0x1.0p0 + 0x1.0p-52;
+  x_end = 0x1.0p0 + 0x1.0p-10;
+  nb_tests = 40000;
+  report_err_fp64(rvvlm_acosh, acoshl, x_start, x_end, nb_tests);
+
+  x_start = 0x1.0p0;
+  x_end = 0x1.0p2;
+  nb_tests = 400000;
+  report_err_fp64(rvvlm_acosh, acoshl, x_start, x_end, nb_tests);
+
+  x_start = 0x1.0p490;
+  x_end = 0x1.0p520;
+  nb_tests = 400000;
+  report_err_fp64(rvvlm_acosh, acoshl, x_start, x_end, nb_tests);
+
+  x_start = 0x1.0p1020;
+  x_end = 0x1.FFFFFFFFFFp1023;
+  nb_tests = 400000;
+  report_err_fp64(rvvlm_acosh, acoshl, x_start, x_end, nb_tests);
+
+  return 0;
+}
diff --git a/test/src/test_acoshI.cpp b/test/src/test_acoshI.cpp
new file mode 100644
index 0000000..755e405
--- /dev/null
+++ b/test/src/test_acoshI.cpp
@@ -0,0 +1,29 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <math.h>
+#include <stdio.h>
+
+#include "rvvlm.h"
+#include "test_infra.h"
+
+#define COMMENT(comment)                                                       \
+  { printf("\n=====\t" comment "\n"); }
+
+int main() {
+  unsigned long nb_tests;
+  double x_start, x_end;
+
+  COMMENT("acoshI: current chosen algorithm; reduced argument in FP64 only")
+
+  x_start = 1.2;
+  x_end = 10.0;
+  nb_tests = 30;
+  int stride_x = 21;
+  int stride_y = 39;
+  report_err_fp64(rvvlm_acoshI, acoshl, x_start, x_end, nb_tests, stride_x,
+                  stride_y);
+
+  return 0;
+}
diff --git a/test/src/test_asinh.cpp b/test/src/test_asinh.cpp
new file mode 100644
index 0000000..be6c6c5
--- /dev/null
+++ b/test/src/test_asinh.cpp
@@ -0,0 +1,63 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <math.h>
+#include <stdio.h>
+
+#include "rvvlm.h"
+#include "test_infra.h"
+
+#define COMMENT(comment)                                                       \
+  { printf("\n=====\t" comment "\n"); }
+
+int main() {
+  unsigned long nb_tests;
+  double x_start, x_end;
+
+  COMMENT("asinh: current chosen algorithm; reduced argument in FP64 only")
+
+  show_special_fp64(rvvlm_asinh, "Special Value handling of this function");
+
+  x_start = 0x1.0p-40;
+  x_end = 0x1.0p-35;
+  nb_tests = 40000;
+  report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
+
+  x_start = -0x1.0p-35;
+  x_end = -0x1.0p-40;
+  nb_tests = 40000;
+  report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
+
+  x_start = 0x1.0p-20;
+  x_end = 0x1.0p-10;
+  nb_tests = 400000;
+  report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
+
+  x_start = 0x1.0p-6;
+  x_end = 0x1.0p0;
+  nb_tests = 400000;
+  report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
+
+  x_start = 0x1.0p0;
+  x_end = 0x1.0p2;
+  nb_tests = 400000;
+  report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
+
+  x_start = -0x1.0p0;
+  x_end = -0x1.0p2;
+  nb_tests = 40000;
+  report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
+
+  x_start = 0x1.0p490;
+  x_end = 0x1.0p520;
+  nb_tests = 4000;
+  report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
+
+  x_start = 0x1.0p1020;
+  x_end = 0x1.FFFFFFFFFFp1023;
+  nb_tests = 4000;
+  report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
+
+  return 0;
+}
diff --git a/test/src/test_asinhI.cpp b/test/src/test_asinhI.cpp
new file mode 100644
index 0000000..fd8b2e2
--- /dev/null
+++ b/test/src/test_asinhI.cpp
@@ -0,0 +1,29 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <math.h>
+#include <stdio.h>
+
+#include "rvvlm.h"
+#include "test_infra.h"
+
+#define COMMENT(comment)                                                       \
+  { printf("\n=====\t" comment "\n"); }
+
+int main() {
+  unsigned long nb_tests;
+  double x_start, x_end;
+
+  COMMENT("asinhI: current chosen algorithm; reduced argument in FP64 only")
+
+  x_start = 1.2;
+  x_end = 10.0;
+  nb_tests = 30;
+  int stride_x = 21;
+  int stride_y = 39;
+  report_err_fp64(rvvlm_asinhI, asinhl, x_start, x_end, nb_tests, stride_x,
+                  stride_y);
+
+  return 0;
+}
diff --git a/test/src/test_atanh.cpp b/test/src/test_atanh.cpp
new file mode 100644
index 0000000..09ddbc7
--- /dev/null
+++ b/test/src/test_atanh.cpp
@@ -0,0 +1,63 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <math.h>
+#include <stdio.h>
+
+#include "rvvlm.h"
+#include "test_infra.h"
+
+#define COMMENT(comment)                                                       \
+  { printf("\n=====\t" comment "\n"); }
+
+int main() {
+  unsigned long nb_tests;
+  double x_start, x_end;
+
+  COMMENT("atanh: current chosen algorithm; reduced argument in FP64 only")
+
+  // show_special_fp64(rvvlm_atanh, "Special Value handling of this function");
+
+  x_start = 0x1.0p-40;
+  x_end = 0x1.0p-35;
+  nb_tests = 40000;
+  report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests);
+
+  x_start = -0x1.0p-35;
+  x_end = -0x1.0p-40;
+  nb_tests = 40000;
+  report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests);
+
+  x_start = 0x1.0p-20;
+  x_end = 0x1.0p-10;
+  nb_tests = 400000;
+  report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests);
+
+  x_start = 0x1.0p-10;
+  x_end = 0x1.ffp-3;
+  nb_tests = 400000;
+  report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests);
+
+  x_start = 0x1.ffp-3;
+  x_end = 0x1.0p-2;
+  nb_tests = 400000;
+  report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests);
+
+  x_start = 0x1.0p-1;
+  x_end = 0x1.0p0 - 0x1.0p-51;
+  nb_tests = 400000;
+  report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests);
+
+  x_start = -0x1.0p-1;
+  x_end = -(0x1.0p0 - 0x1.0p-51);
+  nb_tests = 400000;
+  report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests);
+
+  x_start = -1.0 + 0x1.0p-53;
+  x_end = 1.0 - 0x1.0p-51;
+  nb_tests = 400000;
+  report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests);
+
+  return 0;
+}
diff --git a/test/src/test_atanhI.cpp b/test/src/test_atanhI.cpp
new file mode 100644
index 0000000..8118dc5
--- /dev/null
+++ b/test/src/test_atanhI.cpp
@@ -0,0 +1,29 @@
+// SPDX-FileCopyrightText: 2023 Rivos Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <math.h>
+#include <stdio.h>
+
+#include "rvvlm.h"
+#include "test_infra.h"
+
+#define COMMENT(comment)                                                       \
+  { printf("\n=====\t" comment "\n"); }
+
+int main() {
+  unsigned long nb_tests;
+  double x_start, x_end;
+
+  COMMENT("atanhI: current chosen algorithm; reduced argument in FP64 only")
+
+  x_start = 0.1;
+  x_end = 0.9;
+  nb_tests = 30;
+  int stride_x = 21;
+  int stride_y = 39;
+  report_err_fp64(rvvlm_atanhI, atanhl, x_start, x_end, nb_tests, stride_x,
+                  stride_y);
+
+  return 0;
+}