From 47922417a7b9a7e3cd40625871810d1f1983aedd Mon Sep 17 00:00:00 2001 From: PingTakPeterTang Date: Thu, 4 Jan 2024 19:56:49 +0800 Subject: [PATCH] added inverse hyperbolic functions acosh/asinh/atanh --- CMakeLists.txt | 6 ++ include/rvvlm.h | 69 +++++++++++++-- include/rvvlm_acoshD.inc.h | 120 +++++++++++++++++++++++++ include/rvvlm_asinhcoshD.inc.h | 147 +++++++++++++++++++++++++++++++ include/rvvlm_atanhD.inc.h | 139 +++++++++++++++++++++++++++++ include/rvvlm_invhyperD.h | 154 +++++++++++++++++++++++++++++++++ src/rvvlm_acoshD.c | 16 ++++ src/rvvlm_acoshDI.c | 16 ++++ src/rvvlm_asinhD.c | 16 ++++ src/rvvlm_asinhDI.c | 16 ++++ src/rvvlm_atanhD.c | 14 +++ src/rvvlm_atanhDI.c | 14 +++ test/CMakeLists.txt | 14 ++- test/src/test_acosh.cpp | 43 +++++++++ test/src/test_acoshI.cpp | 29 +++++++ test/src/test_asinh.cpp | 63 ++++++++++++++ test/src/test_asinhI.cpp | 29 +++++++ test/src/test_atanh.cpp | 63 ++++++++++++++ test/src/test_atanhI.cpp | 29 +++++++ 19 files changed, 988 insertions(+), 9 deletions(-) create mode 100644 include/rvvlm_acoshD.inc.h create mode 100644 include/rvvlm_asinhcoshD.inc.h create mode 100644 include/rvvlm_atanhD.inc.h create mode 100644 include/rvvlm_invhyperD.h create mode 100644 src/rvvlm_acoshD.c create mode 100644 src/rvvlm_acoshDI.c create mode 100644 src/rvvlm_asinhD.c create mode 100644 src/rvvlm_asinhDI.c create mode 100644 src/rvvlm_atanhD.c create mode 100644 src/rvvlm_atanhDI.c create mode 100644 test/src/test_acosh.cpp create mode 100644 test/src/test_acoshI.cpp create mode 100644 test/src/test_asinh.cpp create mode 100644 test/src/test_asinhI.cpp create mode 100644 test/src/test_atanh.cpp create mode 100644 test/src/test_atanhI.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 531ff3c..de929db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,6 +25,12 @@ set(PROJECT_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_atan2DI.c ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_atan2piD.c ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_atan2piDI.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_acoshD.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_acoshDI.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_asinhD.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_asinhDI.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_atanhD.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_atanhDI.c ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_expD_tbl.c ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_expD.c ${CMAKE_CURRENT_SOURCE_DIR}/src/rvvlm_expDI.c diff --git a/include/rvvlm.h b/include/rvvlm.h index 3fb8986..336e837 100644 --- a/include/rvvlm.h +++ b/include/rvvlm.h @@ -67,9 +67,9 @@ union sui64_fp64 { #define FAST2SUM(X, Y, S, s, vlen) \ do { \ - S = __riscv_vfadd((X), (Y), (vlen)); \ - s = __riscv_vfsub((X), (S), (vlen)); \ - s = __riscv_vfadd((s), (Y), (vlen)); \ + (S) = __riscv_vfadd((X), (Y), (vlen)); \ + (s) = __riscv_vfsub((X), (S), (vlen)); \ + (s) = __riscv_vfadd((s), (Y), (vlen)); \ } while (0) #define POS2SUM(X, Y, S, s, vlen) \ @@ -92,8 +92,8 @@ union sui64_fp64 { #define PROD_X1Y1(x, y, prod_hi, prod_lo, vlen) \ do { \ - prod_hi = __riscv_vfmul((x), (y), (vlen)); \ - prod_lo = __riscv_vfmsub((x), (y), (prod_hi), (vlen)); \ + (prod_hi) = __riscv_vfmul((x), (y), (vlen)); \ + (prod_lo) = __riscv_vfmsub((x), (y), (prod_hi), (vlen)); \ } while (0) #define DIV_N1D2(numer, denom, delta_d, Q, q, vlen) \ @@ -115,6 +115,32 @@ union sui64_fp64 { (Q) = __riscv_vfadd((Q), _q, (vlen)); \ } while (0) +#define DIV2_N2D2(numer, delta_n, denom, delta_d, Q, delta_Q, vlen) \ + do { \ + VFLOAT _q; \ + (Q) = __riscv_vfdiv((numer), (denom), (vlen)); \ + _q = __riscv_vfnmsub((Q), (denom), (numer), (vlen)); \ + _q = __riscv_vfnmsac(_q, (Q), (delta_d), (vlen)); \ + _q = __riscv_vfadd(_q, (delta_n), (vlen)); \ + (delta_Q) = __riscv_vfmul(_q, __riscv_vfrec7((denom), (vlen)), (vlen)); \ + } while (0) + +#define SQRT2_X2(x, delta_x, r, delta_r, vlen) \ + do { \ + VFLOAT xx = __riscv_vfadd((x), (delta_x), (vlen)); \ + VBOOL x_eq_0 = __riscv_vmfeq(xx, fp_posZero, (vlen)); \ + xx = __riscv_vfmerge(xx, fp_posOne, x_eq_0, (vlen)); \ + (r) = __riscv_vfsqrt(xx, (vlen)); \ + (delta_r) = __riscv_vfnmsub((r), (r), (x), (vlen)); \ + (delta_r) = __riscv_vfadd((delta_r), (delta_x), (vlen)); \ + (delta_r) = __riscv_vfmul((delta_r), __riscv_vfrec7(xx, (vlen)), (vlen)); \ + /* (delta_r) = __riscv_vfdiv((delta_r), xx, (vlen)); */ \ + (delta_r) = __riscv_vfmul((delta_r), 0x1.0p-1, (vlen)); \ + (delta_r) = __riscv_vfmul((delta_r), (r), (vlen)); \ + (r) = __riscv_vfmerge((r), fp_posZero, x_eq_0, (vlen)); \ + (delta_r) = __riscv_vfmerge((delta_r), fp_posZero, x_eq_0, (vlen)); \ + } while (0) + #define IDENTIFY(vclass, stencil, identity_mask, vlen) \ identity_mask = \ __riscv_vmsgtu(__riscv_vand((vclass), (stencil), (vlen)), 0, (vlen)); @@ -189,6 +215,27 @@ union sui64_fp64 { #define RVVLM_ATAN2PIDI_VSET_CONFIG "rvvlm_fp64m2.h" #define RVVLM_ATAN2PIDI_FIXEDPT rvvlm_atan2piI +// FP64 acosh function configuration +#define RVVLM_ACOSHD_VSET_CONFIG "rvvlm_fp64m2.h" +#define RVVLM_ACOSHD_STD rvvlm_acosh + +#define RVVLM_ACOSHDI_VSET_CONFIG "rvvlm_fp64m2.h" +#define RVVLM_ACOSHDI_STD rvvlm_acoshI + +// FP64 asinh function configuration +#define RVVLM_ASINHD_VSET_CONFIG "rvvlm_fp64m2.h" +#define RVVLM_ASINHD_STD rvvlm_asinh + +#define RVVLM_ASINHDI_VSET_CONFIG "rvvlm_fp64m2.h" +#define RVVLM_ASINHDI_STD rvvlm_asinhI + +// FP64 atanh function configuration +#define RVVLM_ATANHD_VSET_CONFIG "rvvlm_fp64m2.h" +#define RVVLM_ATANHD_MIXED rvvlm_atanh + +#define RVVLM_ATANHDI_VSET_CONFIG "rvvlm_fp64m2.h" +#define RVVLM_ATANHDI_MIXED rvvlm_atanhI + // FP64 exp function configuration #define RVVLM_EXPD_VSET_CONFIG "rvvlm_fp64m4.h" #define RVVLM_EXPD_STD rvvlm_expD_std @@ -381,6 +428,18 @@ void RVVLM_ATAN2PIDI_FIXEDPT(size_t xy_len, const double *y, size_t stride_y, const double *x, size_t stride_x, double *z, size_t stride_z); +void RVVLM_ACOSHD_STD(size_t x_len, const double *x, double *y); +void RVVLM_ACOSHDI_STD(size_t x_len, const double *x, size_t stride_x, + double *y, size_t stride_y); + +void RVVLM_ASINHD_STD(size_t x_len, const double *x, double *y); +void RVVLM_ASINHDI_STD(size_t x_len, const double *x, size_t stride_x, + double *y, size_t stride_y); + +void RVVLM_ATANHD_MIXED(size_t x_len, const double *x, double *y); +void RVVLM_ATANHDI_MIXED(size_t x_len, const double *x, size_t stride_x, + double *y, size_t stride_y); + void RVVLM_EXPD_STD(size_t x_len, const double *x, double *y); void RVVLM_EXPDI_STD(size_t x_len, const double *x, size_t stride_x, double *y, size_t stride_y); diff --git a/include/rvvlm_acoshD.inc.h b/include/rvvlm_acoshD.inc.h new file mode 100644 index 0000000..28f8a65 --- /dev/null +++ b/include/rvvlm_acoshD.inc.h @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include "rvvlm_invhyperD.h" + +#if (STRIDE == UNIT_STRIDE) +#define F_VER1 RVVLM_ACOSHD_STD +#else +#define F_VER1 RVVLM_ACOSHDI_STD +#endif + +#include + +// Acosh(x) is defined for x >= 1 by the formula log(x + sqrt(x*x - 1)) +// and for the log function log(2^n z), we uses the expansion in terms of atanh +// n log(2) + 2 atanh((z-1)/(z+1)) +// This algorithm obtains this scale factor 2^n from the input x, and computes +// the expression x' + sqrt(x'*x' - 2^(-2n)) thus avoiding possible overflow or +// excess computation such as computing sqrt(x*x - 1) by x * sqrt(1 - 1/(x*x)) +// which needs a division on top of a sqrt. +void F_VER1(API) { + size_t vlen; + VFLOAT vx, vx_orig, vy, vy_special; + VBOOL special_args; + + SET_ROUNDTONEAREST; + // stripmining over input arguments + for (; _inarg_n > 0; _inarg_n -= vlen) { + vlen = VSET(_inarg_n); + vx = VFLOAD_INARG1(vlen); +#if defined(COMPILE_FOR_ASINH) + vx_orig = vx; +#endif + +#if defined(COMPILE_FOR_ACOSH) + // Handle Inf and NaN and input <= 1.0 + EXCEPTION_HANDLING_ACOSH(vx, special_args, vy_special, vlen); +#else + // Handle Inf and NaN and |input} < 2^(-30) + EXCEPTION_HANDLING_ASINH(vx, special_args, vy_special, vlen); + vx = __riscv_vfsgnj(vx, fp_posOne, vlen); +#endif + + // Need to scale x so that x + sqrt(x*x +/- 1) doesn't overflow + // Since x >= 1, we scale x down by 2^(-550) if x >= 2^500 and set 1 to 0 + VINT n; + VFLOAT u; + SCALE_X(vx, n, u, vlen); + // n is 0 or 500; and u is +/-1.0 or 0.0 + + // sqrt(x*x + u) extra precisely + VFLOAT A, a; +#if defined(COMPILE_FOR_ACOSH) + XSQ_PLUS_U_ACOSH(vx, u, A, a, vlen); +#else + XSQ_PLUS_U_ASINH(vx, u, A, a, vlen); +#endif + // A + a is x*x + u + + VFLOAT B, b; + SQRT2_X2(A, a, B, b, vlen); + // B + b is sqrt(x*x + u) to about 7 extra bits + + // x dominants B for acosh + VFLOAT S, s; +#if defined(COMPILE_FOR_ACOSH) + FAST2SUM(vx, B, S, s, vlen); + s = __riscv_vfadd(s, b, vlen); +#else + FAST2SUM(B, vx, S, s, vlen); + s = __riscv_vfadd(s, b, vlen); +#endif + + // x + sqrt(x*x + u) is accurately represented as S + s + // We first scale S, s so that it falls roughly in [1/rt2, rt2] + SCALE_4_LOG(S, s, n, vlen); + + // log(x + sqrt(x*x + u)) = n * log(2) + log(y); y = S + s + // since log(y) = 2 atanh( (y-1)/(y+1) ) to be approximated + // by t + t^3 * poly(t^2), t = 2 (y-1)/(y+1) + // We now compute the numerator and denominator and its quotient + // to extra precision + VFLOAT numer, delta_numer, denom, delta_denom; + TRANSFORM_2_ATANH(S, s, numer, delta_numer, denom, delta_denom, vlen); + + VFLOAT r_hi, r_lo, r; + DIV2_N2D2(numer, delta_numer, denom, delta_denom, r_hi, r_lo, vlen); + r = __riscv_vfadd(r_hi, r_lo, vlen); + + VFLOAT n_flt = __riscv_vfcvt_f(n, vlen); + + VFLOAT poly; + LOG_POLY(r, r_lo, poly, vlen); + // At this point r_hi + poly approximates log(X) + + // Reconstruction: logB(in_arg) = n logB(2) + log(X) * logB(e), computed as + // n*(logB_2_hi + logB_2_lo) + r * (logB_e_hi + logB_e_lo) + poly * + // logB_e_hi It is best to compute n * logB_2_hi + r * logB_e_hi in extra + // precision + + A = __riscv_vfmul(n_flt, LOG2_HI, vlen); + FAST2SUM(A, r_hi, S, s, vlen); + s = __riscv_vfmacc(s, LOG2_LO, n_flt, vlen); + s = __riscv_vfadd(s, poly, vlen); + + vy = __riscv_vfadd(S, s, vlen); +#if defined(COMPILE_FOR_ASINH) + vy = __riscv_vfsgnj(vy, vx_orig, vlen); +#endif + vy = __riscv_vmerge(vy, vy_special, special_args, vlen); + + // copy vy into y and increment addr pointers + VFSTORE_OUTARG1(vy, vlen); + + INCREMENT_INARG1(vlen); + INCREMENT_OUTARG1(vlen); + } + RESTORE_FRM; +} diff --git a/include/rvvlm_asinhcoshD.inc.h b/include/rvvlm_asinhcoshD.inc.h new file mode 100644 index 0000000..a57e971 --- /dev/null +++ b/include/rvvlm_asinhcoshD.inc.h @@ -0,0 +1,147 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include "rvvlm_invhyperD.h" + +#if defined(COMPILE_FOR_ACOSH) +#if (STRIDE == UNIT_STRIDE) +#define F_VER1 RVVLM_ACOSHD_STD +#else +#define F_VER1 RVVLM_ACOSHDI_STD +#endif +#else +#if (STRIDE == UNIT_STRIDE) +#define F_VER1 RVVLM_ASINHD_STD +#else +#define F_VER1 RVVLM_ASINHDI_STD +#endif +#endif + +#include + +// Acosh(x) is defined for x >= 1 by the formula log(x + sqrt(x*x - 1)) +// Asinh(x) is defined for all finite x by the formula log(x + sqrt(x*x + 1)) +// Acosh is always positive, and Asinh(-x) = -Asinh(x). Thus we in general +// work with |x| and restore the sign (if necessary) in the end. +// For the log function log(2^n z), we uses the expansion in terms of atanh: +// n log(2) + 2 atanh((z-1)/(z+1)) +// The algorithm here first scales down x by 2^(-550) when |x| >= 2^500. +// And for such large x, both acosh and asinh equals log(2x) to very high +// precision. We safely ignore the +/- 1 when this is the case. +// +// A power 2^n is determined by the value of x + sqrt(x*x +/- 1) so that +// scaling the expression by 2^(-n) transforms it to the range [0.71, 1.42]. +// Log(t) for t in this region is computed by 2 atanh((t-1)/(t+1)) +// More precisely, we use s = 2(t-1)/(t+1) and approximate the function +// 2 atanh(s/2) by s + s^3 * polynomial(s^2). +// The final result is n * log(2) + s + s^3 * polynomial(s^2) +// which is computed with care. +void F_VER1(API) { + size_t vlen; + VFLOAT vx, vx_orig, vy, vy_special; + VBOOL special_args; + + SET_ROUNDTONEAREST; + // stripmining over input arguments + for (; _inarg_n > 0; _inarg_n -= vlen) { + vlen = VSET(_inarg_n); + vx = VFLOAD_INARG1(vlen); +#if defined(COMPILE_FOR_ASINH) + vx_orig = vx; +#endif + +#if defined(COMPILE_FOR_ACOSH) + // Handle Inf and NaN and input <= 1.0 + EXCEPTION_HANDLING_ACOSH(vx, special_args, vy_special, vlen); +#else + // Handle Inf and NaN and |input| < 2^(-30) + EXCEPTION_HANDLING_ASINH(vx, special_args, vy_special, vlen); + vx = __riscv_vfsgnj(vx, fp_posOne, vlen); +#endif + + // Need to scale x so that x + sqrt(x*x +/- 1) doesn't overflow + // We scale x down by 2^(-550) if x >= 2^500 and set the "+/- 1" to 0 + VINT n; + VFLOAT u; + SCALE_X(vx, n, u, vlen); + // n is 0 or 500; and u is +/-1.0 or 0.0 + + // sqrt(x*x + u) extra precisely + VFLOAT A, a; +#if defined(COMPILE_FOR_ACOSH) + XSQ_PLUS_U_ACOSH(vx, u, A, a, vlen); +#else + XSQ_PLUS_U_ASINH(vx, u, A, a, vlen); +#endif + // A + a is x*x + u + + VFLOAT B, b; +#if defined(COMPILE_FOR_ACOSH) + SQRT2_X2(A, a, B, b, vlen); + // B + b is sqrt(x*x + u) to about 7 extra bits +#else + // For asinh, we need the sqrt to double-double precision + VFLOAT recip = __riscv_vfrdiv(A, fp_posOne, vlen); + B = __riscv_vfsqrt(A, vlen); + b = __riscv_vfnmsub(B, B, A, vlen); + b = __riscv_vfadd(b, a, vlen); + VFLOAT B_recip = __riscv_vfmul(B, recip, vlen); + b = __riscv_vfmul(b, 0x1.0p-1, vlen); + b = __riscv_vfmul(b, B_recip, vlen); +#endif + + VFLOAT S, s; +#if defined(COMPILE_FOR_ACOSH) + // x dominantes B for acosh + FAST2SUM(vx, B, S, s, vlen); + s = __riscv_vfadd(s, b, vlen); +#else + // B dominates x for asinh + FAST2SUM(B, vx, S, s, vlen); + s = __riscv_vfadd(s, b, vlen); +#endif + + // x + sqrt(x*x + u) is accurately represented as S + s + // We first scale S, s by 2^(-n) so that the scaled value + // falls roughly in [1/rt2, rt2] + SCALE_4_LOG(S, s, n, vlen); + + // log(x + sqrt(x*x + u)) = n * log(2) + log(y); y = S + s + // We use log(y) = 2 atanh( (y-1)/(y+1) ) and approximate the latter + // by t + t^3 * poly(t^2), t = 2 (y-1)/(y+1) + + // We now compute the numerator 2(y-1) and denominator y+1 and their + // quotient to extra precision + VFLOAT numer, delta_numer, denom, delta_denom; + TRANSFORM_2_ATANH(S, s, numer, delta_numer, denom, delta_denom, vlen); + + VFLOAT r_hi, r_lo, r; + DIV2_N2D2(numer, delta_numer, denom, delta_denom, r_hi, r_lo, vlen); + r = __riscv_vfadd(r_hi, r_lo, vlen); + + VFLOAT poly; + LOG_POLY(r, r_lo, poly, vlen); + // At this point r_hi + poly approximates log(X) + + // Compose the final result: n * log(2) + r_hi + poly + VFLOAT n_flt = __riscv_vfcvt_f(n, vlen); + A = __riscv_vfmul(n_flt, LOG2_HI, vlen); + FAST2SUM(A, r_hi, S, s, vlen); + s = __riscv_vfmacc(s, LOG2_LO, n_flt, vlen); + s = __riscv_vfadd(s, poly, vlen); + + vy = __riscv_vfadd(S, s, vlen); +#if defined(COMPILE_FOR_ASINH) + vy = __riscv_vfsgnj(vy, vx_orig, vlen); +#endif + vy = __riscv_vmerge(vy, vy_special, special_args, vlen); + + // copy vy into y and increment addr pointers + VFSTORE_OUTARG1(vy, vlen); + + INCREMENT_INARG1(vlen); + INCREMENT_OUTARG1(vlen); + } + RESTORE_FRM; +} diff --git a/include/rvvlm_atanhD.inc.h b/include/rvvlm_atanhD.inc.h new file mode 100644 index 0000000..10aeab0 --- /dev/null +++ b/include/rvvlm_atanhD.inc.h @@ -0,0 +1,139 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include "rvvlm_invhyperD.h" + +#if (STRIDE == UNIT_STRIDE) +#define F_VER1 RVVLM_ATANHD_MIXED +#else +#define F_VER1 RVVLM_ATANHDI_MIXED +#endif + +#include + +// Atanh(x) is defined only for |x| <= 1. As atanh(-x) = -atanh(x), the +// main computation works with |x|. +// For |x| > 1 and x being a sNaN, the invalid signal has to be generated +// together with a returned valued of NaN. For a qNaN input, no signal is +// generated. And atan(+/- 1) yiedls +/- Inf, but a div-by-zero signal has +// to be generated. +// +// For 0 < x < 1, we use the formula atan(x) = (1/2) log( (1+x)/(1-x) ). +// The usual technique is to find a scale s = 2^(-n) so that +// r = s * (1+x)/(1-x) falls roughly in the region [1/sqrt(2), sqrt(2)]. +// Thus the desired result is (1/2)(n * log(2) + log(r)). +// Somewhat ironically, log(r) is usually approximated in terms of atanh, +// as its Taylor series around 0 converges much faster than that of log(r) +// around 1. log(r) = 2 atanh( (r-1)/(r+1) ). +// Hence, atan(x) = (n/2)log(2) + atan([(1+x)-(1-x)/s]/[(1+x)+(1-x)/s]). +// +// This implementation obtains s=2^(-n) using the approximate reciprocal +// instruction rather than computing (1+x)/(1-x) to extra precision. +// It then combines the two transformations into +// atanh( [(1+x) - (1-x)/s] / [(1+x) + (1-x)/s] ) requiring only +// one division, instead of two. +// We further observe that instead of using multiple extra-precise +// simulations to obtain both the numerator and denominator accurately, +// we can use fixed-point computations. +// As long as the original input |x| >= 0.248, a scale of 60 allows +// both numerator and denominator to maintain high precision without overflow, +// elminating many double-double like simulations. For |x| < 0.248, the +// core polynomial evaluated at x yields the result. +// +void F_VER1(API) { + size_t vlen; + VFLOAT vx, vx_orig, vy, vy_special; + VBOOL special_args; + + SET_ROUNDTONEAREST; + // stripmining over input arguments + for (; _inarg_n > 0; _inarg_n -= vlen) { + vlen = VSET(_inarg_n); + vx = VFLOAD_INARG1(vlen); + vx_orig = vx; + + // Handle Inf, NaN, |input| >= 1, and |input| < 2^(-30) + EXCEPTION_HANDLING_ATANH(vx, special_args, vy_special, vlen); + vx = __riscv_vfsgnj(vx, fp_posOne, vlen); + + // At this point vx are positive number, either 0, or 2^(-30) <= x < 1. + + // Get n so that 2^(-n) * (1+x)/(1-x) is in roughly in the range [1/rt2, + // rt2] + VUINT n; + VFLOAT one_plus_x, one_minus_x; + one_plus_x = __riscv_vfadd(vx, fp_posOne, vlen); + one_minus_x = __riscv_vfrsub(vx, fp_posOne, vlen); + // note one_minus_x >= 2^(-53) is never 0 + VFLOAT ratio = + __riscv_vfmul(one_plus_x, __riscv_vfrec7(one_minus_x, vlen), vlen); + n = __riscv_vadd(__riscv_vsrl(F_AS_U(ratio), MAN_LEN - 8, vlen), 0x96, + vlen); + n = __riscv_vsub(__riscv_vsrl(n, 8, vlen), EXP_BIAS, vlen); + + VINT X = __riscv_vfcvt_x(__riscv_vfmul(vx, 0x1.0p60, vlen), vlen); + VINT Numer, Denom; + // no overflow, so it does not matter if we use the saturating add or not + VINT One_plus_X = __riscv_vadd(X, ONE_Q60, vlen); + VINT One_minus_X = __riscv_vrsub(X, ONE_Q60, vlen); + One_minus_X = __riscv_vsll(One_minus_X, n, vlen); + Numer = __riscv_vsub(One_plus_X, One_minus_X, vlen); + Denom = __riscv_vadd(One_plus_X, One_minus_X, vlen); + VFLOAT numer, delta_numer, denom, delta_denom; + numer = __riscv_vfcvt_f(Numer, vlen); + VINT Tail = __riscv_vsub(Numer, __riscv_vfcvt_x(numer, vlen), vlen); + delta_numer = __riscv_vfcvt_f(Tail, vlen); + denom = __riscv_vfcvt_f(Denom, vlen); + Tail = __riscv_vsub(Denom, __riscv_vfcvt_x(denom, vlen), vlen); + delta_denom = __riscv_vfcvt_f(Tail, vlen); + + VFLOAT r_hi, r_lo, r; + DIV2_N2D2(numer, delta_numer, denom, delta_denom, r_hi, r_lo, vlen); + VBOOL x_in_range = __riscv_vmflt(vx, 0x1.fcp-3, vlen); + r_hi = __riscv_vmerge(r_hi, vx, x_in_range, vlen); + r_lo = __riscv_vfmerge(r_lo, fp_posZero, x_in_range, vlen); + n = __riscv_vmerge(n, 0, x_in_range, vlen); + + r = __riscv_vfadd(r_hi, r_lo, vlen); + VFLOAT rsq = __riscv_vfmul(r, r, vlen); + VFLOAT rcube = __riscv_vfmul(rsq, r, vlen); + VFLOAT r4 = __riscv_vfmul(rsq, rsq, vlen); + VFLOAT r8 = __riscv_vfmul(r4, r4, vlen); + VFLOAT poly_right = PSTEP( + 0x1.745841007bbc5p-4, rsq, + PSTEP(0x1.3b99547086d02p-4, rsq, + PSTEP(0x1.08c7af78d89d3p-4, 0x1.35f7ae00ccaf9p-4, rsq, vlen), + vlen), + vlen); + VFLOAT poly_left = PSTEP( + 0x1.555555555547dp-2, rsq, + PSTEP(0x1.99999999d0d12p-3, rsq, + PSTEP(0x1.249248fe05e8dp-3, 0x1.c71c8bc60dde3p-4, rsq, vlen), + vlen), + vlen); + VFLOAT poly = __riscv_vfmadd(poly_right, r8, poly_left, vlen); + poly = __riscv_vfmadd(poly, rcube, r_lo, vlen); + // At this point r_hi + poly approximates atanh(r) + + // Compose the final answer (n/2)*log(2) + atanh(r) + VFLOAT n_flt = __riscv_vfcvt_f(n, vlen); + VFLOAT A = __riscv_vfmul(n_flt, LOG2_BY2_HI, vlen); + VFLOAT S, s; + FAST2SUM(A, r_hi, S, s, vlen); + s = __riscv_vfmacc(s, LOG2_BY2_LO, n_flt, vlen); + s = __riscv_vfadd(s, poly, vlen); + vy = __riscv_vfadd(S, s, vlen); + + vy = __riscv_vfsgnj(vy, vx_orig, vlen); + + vy = __riscv_vmerge(vy, vy_special, special_args, vlen); + + // copy vy into y and increment addr pointers + VFSTORE_OUTARG1(vy, vlen); + + INCREMENT_INARG1(vlen); + INCREMENT_OUTARG1(vlen); + } + RESTORE_FRM; +} diff --git a/include/rvvlm_invhyperD.h b/include/rvvlm_invhyperD.h new file mode 100644 index 0000000..a71926b --- /dev/null +++ b/include/rvvlm_invhyperD.h @@ -0,0 +1,154 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#define LOG2_HI 0x1.62e42fefa4000p-1 +#define LOG2_LO -0x1.8432a1b0e2634p-43 +#define LOG2_BY2_HI 0x1.62e42fefa4000p-2 +#define LOG2_BY2_LO -0x1.8432a1b0e2634p-44 +#define ONE_Q60 0x1000000000000000 + +#if defined(COMPILE_FOR_ACOSH) +#define PLUS_MINUS_ONE -0x1.0p0 +#else +#define PLUS_MINUS_ONE 0x1.0p0 +#endif + +#define EXCEPTION_HANDLING_ACOSH(vx, special_args, vy_special, vlen) \ + do { \ + VFLOAT vxm1 = __riscv_vfsub((vx), fp_posOne, (vlen)); \ + VUINT vclass = __riscv_vfclass(vxm1, (vlen)); \ + IDENTIFY(vclass, class_negative, (special_args), (vlen)); \ + vxm1 = __riscv_vfmerge(vxm1, fp_sNaN, (special_args), (vlen)); \ + IDENTIFY(vclass, class_NaN | class_Inf | class_negative | class_Zero, \ + (special_args), (vlen)); \ + if (__riscv_vcpop((special_args), (vlen)) > 0) { \ + (vy_special) = __riscv_vfmul((special_args), vxm1, vxm1, (vlen)); \ + (vx) = __riscv_vfmerge((vx), fp_posOne, (special_args), (vlen)); \ + } \ + } while (0) + +#define EXCEPTION_HANDLING_ASINH(vx, special_args, vy_special, vlen) \ + do { \ + VUINT vclass = __riscv_vfclass((vx), (vlen)); \ + VBOOL Inf_or_NaN_or_pm0; \ + IDENTIFY(vclass, class_NaN | class_Inf | class_Zero, Inf_or_NaN_or_pm0, \ + (vlen)); \ + VUINT expo_x = __riscv_vsrl(F_AS_U(vx), MAN_LEN, (vlen)); \ + expo_x = __riscv_vand(expo_x, 0x7FF, (vlen)); \ + VBOOL x_small = __riscv_vmsltu(expo_x, EXP_BIAS - 30, (vlen)); \ + (special_args) = __riscv_vmor(Inf_or_NaN_or_pm0, x_small, (vlen)); \ + if (__riscv_vcpop((special_args), (vlen)) > 0) { \ + VFLOAT tmp = __riscv_vfmadd(x_small, (vx), -0x1.0p-60, (vx), (vlen)); \ + (vy_special) = __riscv_vmerge((vy_special), tmp, x_small, (vlen)); \ + tmp = __riscv_vfadd(Inf_or_NaN_or_pm0, (vx), (vx), (vlen)); \ + (vy_special) = \ + __riscv_vmerge((vy_special), tmp, Inf_or_NaN_or_pm0, (vlen)); \ + (vx) = __riscv_vfmerge((vx), fp_posZero, (special_args), (vlen)); \ + } \ + } while (0) + +#define EXCEPTION_HANDLING_ATANH(vx, special_args, vy_special, vlen) \ + do { \ + VUINT expo_x = __riscv_vand(__riscv_vsrl(F_AS_U(vx), MAN_LEN, (vlen)), \ + 0x7FF, (vlen)); \ + VBOOL x_large = __riscv_vmsgeu(expo_x, EXP_BIAS, (vlen)); \ + VBOOL x_small = __riscv_vmsltu(expo_x, EXP_BIAS - 30, (vlen)); \ + (special_args) = __riscv_vmor(x_large, x_small, (vlen)); \ + if (__riscv_vcpop((special_args), (vlen)) > 0) { \ + VFLOAT abs_x = __riscv_vfsgnj((vx), fp_posOne, (vlen)); \ + VBOOL x_gt_1 = __riscv_vmfgt(abs_x, fp_posOne, (vlen)); \ + VBOOL x_eq_1 = __riscv_vmfeq(abs_x, fp_posOne, (vlen)); \ + /* substitute |x| > 1 with sNaN */ \ + (vx) = __riscv_vfmerge((vx), fp_sNaN, x_gt_1, (vlen)); \ + /* substitute |x| = 1 with +/-Inf and generate div-by-zero signal */ \ + VFLOAT tmp = VFMV_VF(fp_posZero, (vlen)); \ + tmp = __riscv_vfsgnj(tmp, (vx), (vlen)); \ + tmp = __riscv_vfrec7(x_eq_1, tmp, (vlen)); \ + (vy_special) = __riscv_vfadd((special_args), (vx), (vx), (vlen)); \ + (vy_special) = __riscv_vmerge((vy_special), tmp, x_eq_1, (vlen)); \ + tmp = __riscv_vfmadd(x_small, (vx), 0x1.0p-60, (vx), (vlen)); \ + (vy_special) = __riscv_vmerge((vy_special), tmp, x_small, (vlen)); \ + (vx) = __riscv_vfmerge((vx), fp_posZero, (special_args), (vlen)); \ + } \ + } while (0) + +// scale x down by 2^(-550) and set u to 0 if x >= 2^500 +#define SCALE_X(vx, n, u, vlen) \ + do { \ + VUINT expo_x = __riscv_vsrl(F_AS_U((vx)), MAN_LEN, (vlen)); \ + VBOOL x_large = __riscv_vmsgeu(expo_x, EXP_BIAS + 500, (vlen)); \ + (n) = __riscv_vxor((n), (n), (vlen)); \ + (n) = __riscv_vmerge((n), 550, x_large, (vlen)); \ + (u) = VFMV_VF(PLUS_MINUS_ONE, (vlen)); \ + (u) = __riscv_vfmerge((u), fp_posZero, x_large, (vlen)); \ + (vx) = I_AS_F( \ + __riscv_vsub(F_AS_I(vx), __riscv_vsll((n), MAN_LEN, (vlen)), (vlen))); \ + } while (0) + +// 2^(-50) <= X <= 2^500, u is -1 or 0 +// If u is -1, 1 <= X < 2^500 +#define XSQ_PLUS_U_ACOSH(vx, u, A, a, vlen) \ + do { \ + VFLOAT P, p; \ + PROD_X1Y1((vx), (vx), P, p, (vlen)); \ + VFLOAT tmp1, tmp2; \ + FAST2SUM(P, (u), tmp1, tmp2, (vlen)); \ + tmp2 = __riscv_vfadd(tmp2, p, (vlen)); \ + FAST2SUM(tmp1, tmp2, (A), (a), (vlen)); \ + } while (0) + +#define XSQ_PLUS_U_ASINH(vx, u, A, a, vlen) \ + do { \ + VFLOAT P, p; \ + PROD_X1Y1((vx), (vx), P, p, (vlen)); \ + VFLOAT tmp1, tmp2; \ + POS2SUM(P, (u), tmp1, tmp2, (vlen)); \ + tmp2 = __riscv_vfadd(tmp2, p, (vlen)); \ + POS2SUM(tmp1, tmp2, (A), (a), (vlen)); \ + } while (0) + +// scale x down by 2^(-550) and set u to 0 if x >= 2^500 +#define SCALE_4_LOG(S, s, n, vlen) \ + do { \ + VINT expo_x = __riscv_vsra(F_AS_I((S)), MAN_LEN - 8, (vlen)); \ + expo_x = __riscv_vadd(expo_x, 0x96, (vlen)); \ + expo_x = __riscv_vsra(expo_x, 8, (vlen)); \ + VINT n_adjust = __riscv_vsub(expo_x, EXP_BIAS, (vlen)); \ + (n) = __riscv_vadd((n), n_adjust, (vlen)); \ + expo_x = __riscv_vsll(__riscv_vrsub(expo_x, 2 * EXP_BIAS, (vlen)), \ + MAN_LEN, (vlen)); \ + (S) = I_AS_F(__riscv_vsub( \ + F_AS_I((S)), __riscv_vsll(n_adjust, MAN_LEN, (vlen)), (vlen))); \ + (s) = __riscv_vfmul((s), I_AS_F(expo_x), (vlen)); \ + } while (0) + +#define TRANSFORM_2_ATANH(S, s, numer, delta_numer, denom, delta_denom, vlen) \ + do { \ + VFLOAT S_tmp = __riscv_vfsub((S), fp_posOne, (vlen)); \ + FAST2SUM(S_tmp, (s), (numer), (delta_numer), (vlen)); \ + (numer) = __riscv_vfadd((numer), (numer), (vlen)); \ + (delta_numer) = __riscv_vfadd((delta_numer), (delta_numer), (vlen)); \ + S_tmp = VFMV_VF(fp_posOne, (vlen)); \ + FAST2SUM(S_tmp, (S), (denom), (delta_denom), (vlen)); \ + (delta_denom) = __riscv_vfadd((delta_denom), (s), (vlen)); \ + } while (0) + +#define LOG_POLY(r, r_lo, poly, vlen) \ + do { \ + VFLOAT rsq = __riscv_vfmul((r), (r), (vlen)); \ + VFLOAT rcube = __riscv_vfmul(rsq, (r), (vlen)); \ + VFLOAT r6 = __riscv_vfmul(rcube, rcube, (vlen)); \ + VFLOAT poly_right = PSTEP(0x1.c71c543983a27p-12, rsq, \ + PSTEP(0x1.7465c27ee47d0p-14, rsq, \ + PSTEP(0x1.39af2e90a6554p-16, \ + 0x1.2e74f2255e096p-18, rsq, (vlen)), \ + (vlen)), \ + (vlen)); \ + VFLOAT poly_left = \ + PSTEP(0x1.555555555558cp-4, rsq, \ + PSTEP(0x1.9999999982550p-7, 0x1.2492493f7cc71p-9, rsq, (vlen)), \ + (vlen)); \ + (poly) = __riscv_vfmadd(poly_right, r6, poly_left, (vlen)); \ + (poly) = __riscv_vfmadd(poly, rcube, (r_lo), (vlen)); \ + } while (0) diff --git a/src/rvvlm_acoshD.c b/src/rvvlm_acoshD.c new file mode 100644 index 0000000..0a1f4a4 --- /dev/null +++ b/src/rvvlm_acoshD.c @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#define API_SIGNATURE API_SIGNATURE_11 +#define STRIDE UNIT_STRIDE + +#include RVVLM_ACOSHD_VSET_CONFIG + +#define COMPILE_FOR_ACOSH + +#include "rvvlm_asinhcoshD.inc.h" diff --git a/src/rvvlm_acoshDI.c b/src/rvvlm_acoshDI.c new file mode 100644 index 0000000..7685f54 --- /dev/null +++ b/src/rvvlm_acoshDI.c @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#define API_SIGNATURE API_SIGNATURE_11 +#define STRIDE GENERAL_STRIDE + +#include RVVLM_ACOSHDI_VSET_CONFIG + +#define COMPILE_FOR_ACOSH + +#include "rvvlm_asinhcoshD.inc.h" diff --git a/src/rvvlm_asinhD.c b/src/rvvlm_asinhD.c new file mode 100644 index 0000000..8665eb2 --- /dev/null +++ b/src/rvvlm_asinhD.c @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#define API_SIGNATURE API_SIGNATURE_11 +#define STRIDE UNIT_STRIDE + +#include RVVLM_ASINHD_VSET_CONFIG + +#define COMPILE_FOR_ASINH + +#include "rvvlm_asinhcoshD.inc.h" diff --git a/src/rvvlm_asinhDI.c b/src/rvvlm_asinhDI.c new file mode 100644 index 0000000..bcc76b0 --- /dev/null +++ b/src/rvvlm_asinhDI.c @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#define API_SIGNATURE API_SIGNATURE_11 +#define STRIDE GENERAL_STRIDE + +#include RVVLM_ASINHDI_VSET_CONFIG + +#define COMPILE_FOR_ASINH + +#include "rvvlm_asinhcoshD.inc.h" diff --git a/src/rvvlm_atanhD.c b/src/rvvlm_atanhD.c new file mode 100644 index 0000000..837eea9 --- /dev/null +++ b/src/rvvlm_atanhD.c @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#define API_SIGNATURE API_SIGNATURE_11 +#define STRIDE UNIT_STRIDE + +#include RVVLM_ATANHD_VSET_CONFIG + +#include "rvvlm_atanhD.inc.h" diff --git a/src/rvvlm_atanhDI.c b/src/rvvlm_atanhDI.c new file mode 100644 index 0000000..08789c3 --- /dev/null +++ b/src/rvvlm_atanhDI.c @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#define API_SIGNATURE API_SIGNATURE_11 +#define STRIDE GENERAL_STRIDE + +#include RVVLM_ATANHDI_VSET_CONFIG + +#include "rvvlm_atanhD.inc.h" diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7163286..4d6964b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -70,14 +70,20 @@ set(TEST_SOURCES # ${CMAKE_CURRENT_SOURCE_DIR}/src/test_atan2I.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/src/test_atan2pi.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/src/test_atan2piI.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/test_acosh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/test_acoshI.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/test_asinh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/test_asinhI.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/test_atanh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/test_atanhI.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/src/test_exp.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/src/test_expI.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/src/test_exp2.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/src/test_exp2I.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/test_exp10.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/test_exp10I.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/test_expm1.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/test_expm1I.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/src/test_exp10.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/src/test_exp10I.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/src/test_expm1.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/src/test_expm1I.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/src/test_log.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/src/test_logI.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/src/test_log2.cpp diff --git a/test/src/test_acosh.cpp b/test/src/test_acosh.cpp new file mode 100644 index 0000000..6f833d4 --- /dev/null +++ b/test/src/test_acosh.cpp @@ -0,0 +1,43 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#include "test_infra.h" + +#define COMMENT(comment) \ + { printf("\n=====\t" comment "\n"); } + +int main() { + unsigned long nb_tests; + double x_start, x_end; + + COMMENT("acosh: current chosen algorithm; reduced argument in FP64 only") + + show_special_fp64(rvvlm_acosh, "Special Value handling of this function"); + + x_start = 0x1.0p0 + 0x1.0p-52; + x_end = 0x1.0p0 + 0x1.0p-10; + nb_tests = 40000; + report_err_fp64(rvvlm_acosh, acoshl, x_start, x_end, nb_tests); + + x_start = 0x1.0p0; + x_end = 0x1.0p2; + nb_tests = 400000; + report_err_fp64(rvvlm_acosh, acoshl, x_start, x_end, nb_tests); + + x_start = 0x1.0p490; + x_end = 0x1.0p520; + nb_tests = 400000; + report_err_fp64(rvvlm_acosh, acoshl, x_start, x_end, nb_tests); + + x_start = 0x1.0p1020; + x_end = 0x1.FFFFFFFFFFp1023; + nb_tests = 400000; + report_err_fp64(rvvlm_acosh, acoshl, x_start, x_end, nb_tests); + + return 0; +} diff --git a/test/src/test_acoshI.cpp b/test/src/test_acoshI.cpp new file mode 100644 index 0000000..755e405 --- /dev/null +++ b/test/src/test_acoshI.cpp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#include "test_infra.h" + +#define COMMENT(comment) \ + { printf("\n=====\t" comment "\n"); } + +int main() { + unsigned long nb_tests; + double x_start, x_end; + + COMMENT("acoshI: current chosen algorithm; reduced argument in FP64 only") + + x_start = 1.2; + x_end = 10.0; + nb_tests = 30; + int stride_x = 21; + int stride_y = 39; + report_err_fp64(rvvlm_acoshI, acoshl, x_start, x_end, nb_tests, stride_x, + stride_y); + + return 0; +} diff --git a/test/src/test_asinh.cpp b/test/src/test_asinh.cpp new file mode 100644 index 0000000..be6c6c5 --- /dev/null +++ b/test/src/test_asinh.cpp @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#include "test_infra.h" + +#define COMMENT(comment) \ + { printf("\n=====\t" comment "\n"); } + +int main() { + unsigned long nb_tests; + double x_start, x_end; + + COMMENT("asinh: current chosen algorithm; reduced argument in FP64 only") + + show_special_fp64(rvvlm_asinh, "Special Value handling of this function"); + + x_start = 0x1.0p-40; + x_end = 0x1.0p-35; + nb_tests = 40000; + report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); + + x_start = -0x1.0p-35; + x_end = -0x1.0p-40; + nb_tests = 40000; + report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); + + x_start = 0x1.0p-20; + x_end = 0x1.0p-10; + nb_tests = 400000; + report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); + + x_start = 0x1.0p-6; + x_end = 0x1.0p0; + nb_tests = 400000; + report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); + + x_start = 0x1.0p0; + x_end = 0x1.0p2; + nb_tests = 400000; + report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); + + x_start = -0x1.0p0; + x_end = -0x1.0p2; + nb_tests = 40000; + report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); + + x_start = 0x1.0p490; + x_end = 0x1.0p520; + nb_tests = 4000; + report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); + + x_start = 0x1.0p1020; + x_end = 0x1.FFFFFFFFFFp1023; + nb_tests = 4000; + report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); + + return 0; +} diff --git a/test/src/test_asinhI.cpp b/test/src/test_asinhI.cpp new file mode 100644 index 0000000..fd8b2e2 --- /dev/null +++ b/test/src/test_asinhI.cpp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#include "test_infra.h" + +#define COMMENT(comment) \ + { printf("\n=====\t" comment "\n"); } + +int main() { + unsigned long nb_tests; + double x_start, x_end; + + COMMENT("asinhI: current chosen algorithm; reduced argument in FP64 only") + + x_start = 1.2; + x_end = 10.0; + nb_tests = 30; + int stride_x = 21; + int stride_y = 39; + report_err_fp64(rvvlm_asinhI, asinhl, x_start, x_end, nb_tests, stride_x, + stride_y); + + return 0; +} diff --git a/test/src/test_atanh.cpp b/test/src/test_atanh.cpp new file mode 100644 index 0000000..09ddbc7 --- /dev/null +++ b/test/src/test_atanh.cpp @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#include "test_infra.h" + +#define COMMENT(comment) \ + { printf("\n=====\t" comment "\n"); } + +int main() { + unsigned long nb_tests; + double x_start, x_end; + + COMMENT("atanh: current chosen algorithm; reduced argument in FP64 only") + + // show_special_fp64(rvvlm_atanh, "Special Value handling of this function"); + + x_start = 0x1.0p-40; + x_end = 0x1.0p-35; + nb_tests = 40000; + report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); + + x_start = -0x1.0p-35; + x_end = -0x1.0p-40; + nb_tests = 40000; + report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); + + x_start = 0x1.0p-20; + x_end = 0x1.0p-10; + nb_tests = 400000; + report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); + + x_start = 0x1.0p-10; + x_end = 0x1.ffp-3; + nb_tests = 400000; + report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); + + x_start = 0x1.ffp-3; + x_end = 0x1.0p-2; + nb_tests = 400000; + report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); + + x_start = 0x1.0p-1; + x_end = 0x1.0p0 - 0x1.0p-51; + nb_tests = 400000; + report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); + + x_start = -0x1.0p-1; + x_end = -(0x1.0p0 - 0x1.0p-51); + nb_tests = 400000; + report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); + + x_start = -1.0 + 0x1.0p-53; + x_end = 1.0 - 0x1.0p-51; + nb_tests = 400000; + report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); + + return 0; +} diff --git a/test/src/test_atanhI.cpp b/test/src/test_atanhI.cpp new file mode 100644 index 0000000..8118dc5 --- /dev/null +++ b/test/src/test_atanhI.cpp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#include "test_infra.h" + +#define COMMENT(comment) \ + { printf("\n=====\t" comment "\n"); } + +int main() { + unsigned long nb_tests; + double x_start, x_end; + + COMMENT("atanhI: current chosen algorithm; reduced argument in FP64 only") + + x_start = 0.1; + x_end = 0.9; + nb_tests = 30; + int stride_x = 21; + int stride_y = 39; + report_err_fp64(rvvlm_atanhI, atanhl, x_start, x_end, nb_tests, stride_x, + stride_y); + + return 0; +}