From ca948ac683950bbe53bf5194c92e26c553692340 Mon Sep 17 00:00:00 2001 From: PingTakPeterTang Date: Thu, 28 Mar 2024 17:37:17 -0700 Subject: [PATCH] add the FP64 true gamma function tgamma --- CMakeLists.txt | 2 + include/rvvlm.h | 23 ++ include/rvvlm_gammafuncsD.h | 27 +++ include/rvvlm_tgammaD.inc.h | 446 +++++++++++++++++++++++++++++++++++ src/rvvlm_tgammaD.c | 16 ++ src/rvvlm_tgammaDI.c | 16 ++ test/CMakeLists.txt | 2 + test/include/test_infra.h | 5 + test/src/test_acos.cpp | 8 +- test/src/test_acospi.cpp | 8 +- test/src/test_asinh.cpp | 16 +- test/src/test_atan2.cpp | 24 +- test/src/test_atan2pi.cpp | 48 ++-- test/src/test_atanh.cpp | 16 +- test/src/test_cdfnorm.cpp | 12 +- test/src/test_cdfnorminv.cpp | 16 +- test/src/test_erf.cpp | 10 +- test/src/test_erfcinv.cpp | 16 +- test/src/test_erfinv.cpp | 28 +-- test/src/test_exp10.cpp | 12 +- test/src/test_exp2.cpp | 8 +- test/src/test_expm1.cpp | 14 +- test/src/test_infra.cpp | 37 +++ test/src/test_log.cpp | 10 +- test/src/test_log2.cpp | 10 +- test/src/test_pow.cpp | 12 +- test/src/test_tan.cpp | 10 +- test/src/test_tgamma.cpp | 83 +++++++ test/src/test_tgammaI.cpp | 24 ++ 29 files changed, 816 insertions(+), 143 deletions(-) create mode 100644 include/rvvlm_gammafuncsD.h create mode 100644 include/rvvlm_tgammaD.inc.h create mode 100644 src/rvvlm_tgammaD.c create mode 100644 src/rvvlm_tgammaDI.c create mode 100644 test/src/test_tgamma.cpp create mode 100644 test/src/test_tgammaI.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b29a7f..a71de7e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,6 +98,8 @@ set(PROJECT_SOURCES src/rvvlm_sinhDI.c src/rvvlm_tanhD.c src/rvvlm_tanhDI.c + src/rvvlm_tgammaD.c + src/rvvlm_tgammaDI.c ) add_library(vecm diff --git a/include/rvvlm.h b/include/rvvlm.h index 7e00870..3eb6ed8 100644 --- a/include/rvvlm.h +++ b/include/rvvlm.h @@ -172,6 +172,17 @@ union sui64_fp64 { (delta_Q) = __riscv_vfmul(_q, __riscv_vfrec7((denom), (vlen)), (vlen)); \ } while (0) +#define ACC_DIV2_N2D2(numer, delta_n, denom, delta_d, Q, delta_Q, vlen) \ + do { \ + VFLOAT _recip, _q; \ + _recip = __riscv_vfrdiv((denom), 0x1.0p0, (vlen)); \ + (Q) = __riscv_vfmul((numer), _recip, (vlen)); \ + _q = __riscv_vfnmsub((Q), (denom), (numer), (vlen)); \ + _q = __riscv_vfnmsac(_q, (Q), (delta_d), (vlen)); \ + _q = __riscv_vfadd(_q, (delta_n), (vlen)); \ + (delta_Q) = __riscv_vfmul(_q, _recip, (vlen)); \ + } while (0) + #define SQRT2_X2(x, delta_x, r, delta_r, vlen) \ do { \ VFLOAT xx = __riscv_vfadd((x), (delta_x), (vlen)); \ @@ -469,6 +480,13 @@ union sui64_fp64 { #define RVVLM_TANPIDI_VSET_CONFIG "rvvlm_fp64m2.h" #define RVVLM_TANPIDI_MERGED rvvlm_tanpiI +// FP64 tgamma function configuration +#define RVVLM_TGAMMAD_VSET_CONFIG "rvvlm_fp64m1.h" +#define RVVLM_TGAMMAD_STD rvvlm_tgamma + +#define RVVLM_TGAMMADI_VSET_CONFIG "rvvlm_fp64m1.h" +#define RVVLM_TGAMMADI_STD rvvlm_tgammaI + // FP64 cosh function configuration #define RVVLM_COSHD_VSET_CONFIG "rvvlm_fp64m2.h" #define RVVLM_COSHD_STD rvvlm_coshD_std @@ -499,6 +517,7 @@ extern int64_t expD_tbl64_fixedpt[64]; extern int64_t logD_tbl128_fixedpt[128]; extern double logtbl_4_powD_128_hi_lo[256]; extern double dbl_2ovpi_tbl[28]; +extern int64_t factorial_fixedpt[180]; // Define the functions in the vector math library void RVVLM_ACOSD_FIXEDPT(size_t x_len, const double *x, double *y); @@ -703,6 +722,10 @@ void RVVLM_TANHD_STD(size_t x_len, const double *x, double *y); void RVVLM_TANHDI_STD(size_t x_len, const double *x, size_t stride_x, double *y, size_t stride_y); +void RVVLM_TGAMMAD_STD(size_t x_len, const double *x, double *y); +void RVVLM_TGAMMADI_STD(size_t x_len, const double *x, size_t stride_x, + double *y, size_t stride_y); + #ifdef __cplusplus } #endif diff --git a/include/rvvlm_gammafuncsD.h b/include/rvvlm_gammafuncsD.h new file mode 100644 index 0000000..80ccbcf --- /dev/null +++ b/include/rvvlm_gammafuncsD.h @@ -0,0 +1,27 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +// gamma(+inf) = +inf; gamma(-inf/sNaN) is qNaN with invalid +// gamma(qNaN) is qNaN +// gamma(+-0) is +-inf and divide by 0 +// gamma(tiny) is 1/tiny +#define EXCEPTION_HANDLING_TGAMMA(vx, special_args, vy_special, vlen) \ + do { \ + VUINT expo_x = __riscv_vand(__riscv_vsrl(F_AS_U((vx)), MAN_LEN, (vlen)), \ + 0x7FF, (vlen)); \ + VBOOL x_small = __riscv_vmsltu(expo_x, EXP_BIAS - 60, (vlen)); \ + VBOOL x_InfNaN = __riscv_vmseq(expo_x, 0x7FF, (vlen)); \ + (special_args) = __riscv_vmor(x_small, x_InfNaN, (vlen)); \ + if (__riscv_vcpop((special_args), (vlen)) > 0) { \ + VUINT vclass = __riscv_vfclass((vx), (vlen)); \ + VBOOL x_negInf; \ + IDENTIFY(vclass, class_negInf, x_negInf, (vlen)); \ + (vx) = __riscv_vfmerge((vx), fp_sNaN, x_negInf, (vlen)); \ + VFLOAT y_tmp = __riscv_vfadd(x_InfNaN, (vx), (vx), (vlen)); \ + (vy_special) = __riscv_vmerge((vy_special), y_tmp, x_InfNaN, (vlen)); \ + y_tmp = __riscv_vfrdiv(x_small, (vx), fp_posOne, (vlen)); \ + (vy_special) = __riscv_vmerge((vy_special), y_tmp, x_small, (vlen)); \ + (vx) = __riscv_vfmerge((vx), fp_posOne, (special_args), (vlen)); \ + } \ + } while (0) diff --git a/include/rvvlm_tgammaD.inc.h b/include/rvvlm_tgammaD.inc.h new file mode 100644 index 0000000..acecb82 --- /dev/null +++ b/include/rvvlm_tgammaD.inc.h @@ -0,0 +1,446 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#if (STRIDE == UNIT_STRIDE) +#define F_VER1 RVVLM_TGAMMAD_STD +#else +#define F_VER1 RVVLM_TGAMMADI_STD +#endif + +//---Approximate log(x) by w + w^3 poly(w^2) +// w = 2(x-1)/(x+1), x roughly in [1/rt(2), rt(2)] +#define P_log_0 0x5555555555555090 // Q_66 +#define P_log_1 0x666666666686863a // Q_69 +#define P_log_2 0x49249248fc99ba4b // Q_71 +#define P_log_3 0x71c71ca402e164fa // Q_74 +#define P_log_4 0x5d1733e3ae94dde0 // Q_76 +#define P_log_5 0x4ec8b69784234032 // Q_78 +#define P_log_6 0x43cc44a056dc3c93 // Q_80 +#define P_log_7 0x4432439bb76e7d74 // Q_82 + +#define LOG2_HI 0x1.62e42fefa4000p-1 +#define LOG2_LO -0x1.8432a1b0e2634p-43 + +//---Approximate exp(R) by 1 + R + R^2*poly(R) +#define P_exp_0 0x400000000000004e // Q_63 +#define P_exp_1 0x1555555555555b6e // Q_63 +#define P_exp_2 0x555555555553378 // Q_63 +#define P_exp_3 0x1111111110ec10d // Q_63 +#define P_exp_4 0x2d82d82d87a9b5 // Q_63 +#define P_exp_5 0x6806806ce6d6f // Q_63 +#define P_exp_6 0xd00d00841fcf // Q_63 +#define P_exp_7 0x171ddefda54b // Q_63 +#define P_exp_8 0x24fcc01d627 // Q_63 +#define P_exp_9 0x35ed8bbd24 // Q_63 +#define P_exp_10 0x477745b6c // Q_63 + +//---Approximate Stirling correction by P(t)/Q(t) +// Gamma(x) = (x/e)^(x-1/2) * P(t)/Q(t), t = 1/x, x in [2, 180] +#define P_corr_0 0x599ecf7a9368327 // Q_78 +#define P_corr_1 0x120a4be8e3d8673d // Q_78 +#define P_corr_2 0x2ab73aec63e90213 // Q_78 +#define P_corr_3 0x32f903e18454e088 // Q_78 +#define P_corr_4 0x29f463d533d0a4b5 // Q_78 +#define P_corr_5 0x1212989fdf61f6c1 // Q_78 +#define P_corr_6 0x48706d4f75a0491 // Q_78 +#define P_corr_7 0x5591439d2d51a6 // Q_78 + +#define Q_corr_0 0x75e5053ce715a76 // Q_79 +#define Q_corr_1 0x171e2068d3ef7453 // Q_79 +#define Q_corr_2 0x363d736690f2373f // Q_79 +#define Q_corr_3 0x3e793a1cc19bbc32 // Q_79 +#define Q_corr_4 0x31dc2fbf92ec978c // Q_79 +#define Q_corr_5 0x138c2244d1c1e0b1 // Q_79 +#define Q_corr_6 0x450a7392d81c20f // Q_79 +#define Q_corr_7 0x1ed9c605221435 // Q_79 + +//---Approximate sin(pi x)/pi as x + x^3 poly(x^2) +#define P_sin_0 -0x694699894c1f4ae7 // Q_62 +#define P_sin_1 0x33f396805788034f // Q_62 +#define P_sin_2 -0xc3547239048c220 // Q_62 +#define P_sin_3 0x1ac6805cc1cecf4 // Q_62 +#define P_sin_4 -0x26702d2fd5a3e6 // Q_62 +#define P_sin_5 0x26e8d360232c6 // Q_62 +#define P_sin_6 -0x1d3e4d9787ba // Q_62 +#define P_sin_7 0x107298fc107 // Q_62 + +//---Compute log(x/e) to 2^(-65) absolute accuracy +// for Stirlings formula (x/e)^x sqrt(2pi/x) = (x/e)^(x-1/2) sqrt(2pi/e) +#define TGAMMA_LOG(x_hi, x_lo, y_hi, y_lo, vlen) \ + do { \ + VFLOAT x_in_hi = (x_hi); \ + VFLOAT x_in_lo = (x_lo); \ + VINT n = __riscv_vadd(__riscv_vsra(F_AS_I(x_in_hi), MAN_LEN - 8, (vlen)), \ + 0x96, vlen); \ + n = __riscv_vsub(__riscv_vsra(n, 8, vlen), EXP_BIAS, vlen); \ + VFLOAT scale = I_AS_F( \ + __riscv_vsll(__riscv_vrsub(n, EXP_BIAS, (vlen)), MAN_LEN, (vlen))); \ + x_in_hi = __riscv_vfmul(x_in_hi, scale, (vlen)); \ + x_in_lo = __riscv_vfmul(x_in_lo, scale, (vlen)); \ + /* x is scaled, and log(x) is 2 atanh(w/2); w = 2(x-1)/(x+1) */ \ + \ + VFLOAT numer, denom, denom_delta; \ + numer = __riscv_vfsub(x_in_hi, fp_posOne, (vlen)); /* exact */ \ + denom = __riscv_vfadd(x_in_hi, fp_posOne, (vlen)); \ + denom_delta = __riscv_vfadd(__riscv_vfrsub(denom, fp_posOne, (vlen)), \ + x_in_hi, (vlen)); \ + denom_delta = __riscv_vfadd(denom_delta, x_in_lo, (vlen)); \ + VFLOAT w_hi, w_lo; \ + ACC_DIV2_N2D2(numer, x_in_lo, denom, denom_delta, w_hi, w_lo, vlen); \ + /* w_hi + w_lo is at this point (x-1)/(x+1) */ \ + /* Next get 2(x-1)/(x+1) in Q64 fixed point */ \ + VINT W = __riscv_vfcvt_x(__riscv_vfmul(w_hi, 0x1.0p65, (vlen)), (vlen)); \ + W = __riscv_vadd( \ + W, __riscv_vfcvt_x(__riscv_vfmul(w_lo, 0x1.0p65, (vlen)), (vlen)), \ + (vlen)); \ + /* W is in Q64 because W is 2(x-1)/(x+1) */ \ + \ + VFLOAT n_flt = __riscv_vfcvt_f(n, (vlen)); \ + VINT W2 = __riscv_vsmul(W, W, 1, (vlen)); /* Q65 */ \ + \ + VINT P_right, P_left, W8; \ + P_right = PSTEP_I_SRA(P_log_6, P_log_7, 4, W2, (vlen)); \ + P_right = PSTEP_I_SRA(P_log_5, W2, 4, P_right, (vlen)); \ + P_right = PSTEP_I_SRA(P_log_4, W2, 4, P_right, (vlen)); \ + /* P_right in Q76 */ \ + P_left = PSTEP_I_SRA(P_log_2, P_log_3, 5, W2, (vlen)); \ + P_left = PSTEP_I_SRA(P_log_1, W2, 4, P_left, (vlen)); \ + P_left = PSTEP_I_SRA(P_log_0, W2, 5, P_left, (vlen)); \ + /* P_left in Q66 */ \ + W8 = __riscv_vsmul(W2, W2, 1, (vlen)); /* Q67 */ \ + W8 = __riscv_vsmul(W8, W8, 1, (vlen)); /* Q71 */ \ + P_right = __riscv_vsmul(P_right, W8, 1, (vlen)); /* Q84 */ \ + P_right = __riscv_vsra(P_right, 18, (vlen)); /* Q66 */ \ + P_left = __riscv_vadd(P_left, P_right, (vlen)); /* Q66 */ \ + \ + VINT W3 = __riscv_vsmul(W2, W, 1, (vlen)); /* Q66 */ \ + P_left = __riscv_vsmul(P_left, W3, 1, (vlen)); /* Q69 */ \ + VFLOAT poly_hi = __riscv_vfcvt_f(P_left, (vlen)); \ + P_left = __riscv_vsub(P_left, __riscv_vfcvt_x(poly_hi, (vlen)), (vlen)); \ + VFLOAT poly_lo = __riscv_vfcvt_f(P_left, (vlen)); \ + poly_hi = __riscv_vfmul(poly_hi, 0x1.0p-69, (vlen)); \ + poly_lo = __riscv_vfmul(poly_lo, 0x1.0p-69, (vlen)); \ + \ + /* n*log(2) - 1 + w + poly is the desired result */ \ + VFLOAT A, B; \ + A = __riscv_vfmul(n_flt, LOG2_HI, (vlen)); /* exact */ \ + A = __riscv_vfsub(A, fp_posOne, (vlen)); /* exact due to A's range */ \ + w_hi = __riscv_vfadd(w_hi, w_hi, (vlen)); \ + w_lo = __riscv_vfadd(w_lo, w_lo, (vlen)); \ + FAST2SUM(A, w_hi, B, (y_lo), (vlen)); \ + w_lo = __riscv_vfadd((y_lo), w_lo, (vlen)); \ + w_lo = __riscv_vfmacc(w_lo, LOG2_LO, n_flt, (vlen)); \ + poly_lo = __riscv_vfadd(poly_lo, w_lo, (vlen)); \ + FAST2SUM(B, poly_hi, (y_hi), (y_lo), (vlen)); \ + (y_lo) = __riscv_vfadd((y_lo), poly_lo, (vlen)); \ + } while (0) + +//---Compute exp for Stirlings formula used in tgamma +// computes exp(x_hi + x_lo) as 2^n * EXP, EXP is fixed-point Q62 +#define TGAMMA_EXP(x_hi, x_lo, n, EXP, vlen) \ + do { \ + VFLOAT n_flt = __riscv_vfmul((x_hi), 0x1.71547652b82fep+0, (vlen)); \ + (n) = __riscv_vfcvt_x(n_flt, (vlen)); \ + n_flt = __riscv_vfcvt_f((n), (vlen)); \ + VFLOAT r_hi = __riscv_vfnmsub(n_flt, LOG2_HI, (x_hi), (vlen)); \ + VFLOAT r_lo = __riscv_vfnmsub(n_flt, LOG2_LO, (x_lo), (vlen)); \ + r_hi = __riscv_vfmul(r_hi, 0x1.0p63, (vlen)); \ + r_lo = __riscv_vfmul(r_lo, 0x1.0p63, (vlen)); \ + VINT R = __riscv_vfcvt_x(r_hi, (vlen)); \ + R = __riscv_vadd(R, __riscv_vfcvt_x(r_lo, (vlen)), (vlen)); \ + /* R is reduced argument in Q63 */ \ + \ + VINT P_right = \ + PSTEP_I(P_exp_5, R, \ + PSTEP_I(P_exp_6, R, \ + PSTEP_I(P_exp_7, R, \ + PSTEP_I(P_exp_8, R, \ + PSTEP_I(P_exp_9, P_exp_10, R, (vlen)), \ + (vlen)), \ + (vlen)), \ + (vlen)), \ + (vlen)); \ + VINT R_sq = __riscv_vsmul(R, R, 1, (vlen)); \ + VINT R_to_5 = __riscv_vsmul(R_sq, R_sq, 1, (vlen)); \ + R_to_5 = __riscv_vsmul(R_sq, R_sq, 1, (vlen)); \ + R_to_5 = __riscv_vsmul(R_to_5, R, 1, (vlen)); \ + VINT P_left = \ + PSTEP_I(P_exp_0, R, \ + PSTEP_I(P_exp_1, R, \ + PSTEP_I(P_exp_2, R, \ + PSTEP_I(P_exp_3, P_exp_4, R, (vlen)), (vlen)), \ + (vlen)), \ + (vlen)); \ + P_right = __riscv_vsmul(P_right, R_to_5, 1, (vlen)); \ + P_left = __riscv_vadd(P_right, P_left, (vlen)); \ + P_left = __riscv_vsmul(P_left, R_sq, 1, (vlen)); \ + P_left = __riscv_vadd(P_left, R, (vlen)); \ + (EXP) = __riscv_vsra(P_left, 1, (vlen)); \ + INT ONE = (1LL) << 62; \ + (EXP) = __riscv_vadd((EXP), ONE, (vlen)); \ + } while (0) + +// Compute the term (x/e)^(x-1/2) for 2 <= x <= 180. +// Return integer n and Q62 fixed point EXP, 2^n value_of(EXP) is (x/e)^(x-1/2) +#define STIRLING_POWER(x_hi, x_lo, n, EXP, vlen) \ + do { \ + VFLOAT y_hi, y_lo; \ + TGAMMA_LOG((x_hi), (x_lo), y_hi, y_lo, (vlen)); \ + VFLOAT x_m_half = __riscv_vfsub((x_hi), 0x1.0p-1, (vlen)); \ + /* compute (x_m_half, x_lo) * (y_hi, y_lo) */ \ + VFLOAT z_hi, z_lo; \ + PROD_X1Y1(x_m_half, y_hi, z_hi, z_lo, (vlen)); \ + z_lo = __riscv_vfmacc(z_lo, x_m_half, y_lo, (vlen)); \ + z_lo = __riscv_vfmacc(z_lo, (x_lo), y_hi, (vlen)); \ + TGAMMA_EXP(z_hi, z_lo, (n), (EXP), (vlen)); \ + } while (0) + +// Gamma based on Stirling formula is Gamma(x) ~ (x/e)^x sqrt(2 pi / x) +// poly(1/x) To incoporate the 1/sqrt(x) into the power calculation +// (x/e)^(x-1/2) sqrt(2 pi / e ) poly(1/x) +// This poly(1/x) is in essense a correction term +#define STIRLING_CORRECTION(x_hi, x_lo, P_SC, Q_SC, vlen) \ + do { \ + /* 2 <= x < 180. Use Q62 to represent 1/x in fixed point */ \ + VFLOAT y_hi = __riscv_vfrdiv((x_hi), fp_posOne, (vlen)); \ + VFLOAT y_lo = VFMV_VF(fp_posOne, (vlen)); \ + y_lo = __riscv_vfnmsub((x_hi), y_hi, y_lo, (vlen)); \ + y_lo = __riscv_vfnmsac(y_lo, (x_lo), y_hi, (vlen)); \ + y_lo = __riscv_vfmul(y_hi, y_lo, (vlen)); \ + y_hi = __riscv_vfmul(y_hi, 0x1.0p62, (vlen)); \ + y_lo = __riscv_vfmul(y_lo, 0x1.0p62, (vlen)); \ + VINT R = __riscv_vfcvt_x(y_hi, (vlen)); \ + R = __riscv_vadd(R, __riscv_vfcvt_x(y_lo, (vlen)), (vlen)); \ + /* R is 1/(x_hi+x_lo) in Q62 */ \ + (P_SC) = PSTEP_I_SLL(P_corr_6, P_corr_7, 1, R, (vlen)); \ + (P_SC) = PSTEP_I_SLL(P_corr_5, R, 1, (P_SC), (vlen)); \ + (P_SC) = PSTEP_I_SLL(P_corr_4, R, 1, (P_SC), (vlen)); \ + (P_SC) = PSTEP_I_SLL(P_corr_3, R, 1, (P_SC), (vlen)); \ + (P_SC) = PSTEP_I_SLL(P_corr_2, R, 1, (P_SC), (vlen)); \ + (P_SC) = PSTEP_I_SLL(P_corr_1, R, 1, (P_SC), (vlen)); \ + (P_SC) = PSTEP_I_SLL(P_corr_0, R, 1, (P_SC), (vlen)); \ + \ + (Q_SC) = PSTEP_I_SLL(Q_corr_6, Q_corr_7, 1, R, (vlen)); \ + (Q_SC) = PSTEP_I_SLL(Q_corr_5, R, 1, (Q_SC), (vlen)); \ + (Q_SC) = PSTEP_I_SLL(Q_corr_4, R, 1, (Q_SC), (vlen)); \ + (Q_SC) = PSTEP_I_SLL(Q_corr_3, R, 1, (Q_SC), (vlen)); \ + (Q_SC) = PSTEP_I_SLL(Q_corr_2, R, 1, (Q_SC), (vlen)); \ + (Q_SC) = PSTEP_I_SLL(Q_corr_1, R, 1, (Q_SC), (vlen)); \ + (Q_SC) = PSTEP_I_SLL(Q_corr_0, R, 1, (Q_SC), (vlen)); \ + } while (0) + +// When input x to gamma(x) is negative, a factor of sin(pi x)/pi +// is needed. When x is an exact negative integer, we need to return +// +-inf as special values and also raise the divide-by-zero signal +// The input to TGAMMA_SIN is actually |x| clipped to [2^(-60), 179.5] +#define TGAMMA_SIN(x, P_SIN, SIN_scale, n, vy_special, special_args, vlen) \ + do { \ + VFLOAT n_flt; \ + (n) = __riscv_vfcvt_x((x), (vlen)); \ + n_flt = __riscv_vfcvt_f((n), (vlen)); \ + VFLOAT r = __riscv_vfsub((x), n_flt, (vlen)); \ + VINT m = __riscv_vsra(F_AS_I(r), MAN_LEN, (vlen)); \ + m = __riscv_vrsub(__riscv_vand(m, 0x7FF, (vlen)), EXP_BIAS, (vlen)); \ + /* r = 2^(-m) * val, val in [1, 2). Note that 1 <= m <= 60 */ \ + VFLOAT scale = I_AS_F(__riscv_vsll(__riscv_vadd(m, EXP_BIAS + 61, (vlen)), \ + MAN_LEN, (vlen))); \ + VINT R = __riscv_vfcvt_x(__riscv_vfmul(r, scale, (vlen)), (vlen)); \ + /* R is fixed point in scale 61+m */ \ + VFLOAT rsq = __riscv_vfmul(r, r, (vlen)); \ + VFLOAT rsq_lo = __riscv_vfmsub(r, r, rsq, (vlen)); \ + VINT Rsq = __riscv_vfcvt_x(__riscv_vfmul(rsq, 0x1.0p63, (vlen)), (vlen)); \ + Rsq = __riscv_vadd( \ + Rsq, __riscv_vfcvt_x(__riscv_vfmul(rsq_lo, 0x1.0p63, (vlen)), (vlen)), \ + (vlen)); \ + VINT P_right = PSTEP_I( \ + P_sin_4, Rsq, \ + PSTEP_I(P_sin_5, Rsq, PSTEP_I(P_sin_6, P_sin_7, Rsq, (vlen)), (vlen)), \ + (vlen)); \ + VINT R8 = __riscv_vsmul(Rsq, Rsq, 1, (vlen)); \ + R8 = __riscv_vsmul(R8, R8, 1, (vlen)); \ + VINT P_left = PSTEP_I( \ + P_sin_0, Rsq, \ + PSTEP_I(P_sin_1, Rsq, PSTEP_I(P_sin_2, P_sin_3, Rsq, (vlen)), (vlen)), \ + (vlen)); \ + P_right = __riscv_vsmul(P_right, R8, 1, (vlen)); \ + P_left = __riscv_vadd(P_left, P_right, (vlen)); \ + P_left = __riscv_vsmul(P_left, Rsq, 1, (vlen)); \ + P_left = __riscv_vsmul(P_left, R, 1, (vlen)); \ + (P_SIN) = __riscv_vadd(R, __riscv_vsll(P_left, 1, (vlen)), (vlen)); \ + (SIN_scale) = __riscv_vadd(m, 61, (vlen)); \ + VBOOL pole = __riscv_vmseq(R, 0, (vlen)); \ + if (__riscv_vcpop(pole, (vlen)) > 0) { \ + VFLOAT pm_inf = __riscv_vfrec7(pole, I_AS_F(R), (vlen)); \ + pm_inf = __riscv_vfsgnjn(pm_inf, I_AS_F(__riscv_vsll((n), 63, (vlen))), \ + (vlen)); \ + (vy_special) = __riscv_vmerge((vy_special), pm_inf, pole, (vlen)); \ + (special_args) = __riscv_vmor((special_args), pole, (vlen)); \ + (P_SIN) = __riscv_vmerge((P_SIN), 0x8000, pole, (vlen)); \ + } \ + } while (0) + +void F_VER1(API) { + size_t vlen; + VFLOAT vx, vx_orig, vy, vy_special; + VBOOL special_args; + + SET_ROUNDTONEAREST; + // stripmining over input arguments + for (; _inarg_n > 0; _inarg_n -= vlen) { + vlen = VSET(_inarg_n); + vx = VFLOAD_INARG1(vlen); + + // Handle Inf and NaN and |vx| < 2^(-60) + EXCEPTION_HANDLING_TGAMMA(vx, special_args, vy_special, vlen); + vx_orig = vx; + + vx = __riscv_vfabs(vx, vlen); + vx = __riscv_vfmin(vx, 0x1.67p+7, vlen); + vx_orig = __riscv_vfsgnj(vx, vx_orig, vlen); + + VFLOAT vx_hi = vx; + VFLOAT vx_lo = VFMV_VF(fp_posZero, vlen); + VBOOL x_lt_0 = __riscv_vmflt(vx_orig, fp_posZero, vlen); + + // VINT P_SIN, SIN_scale, lsb; + // TGAMMA_SIN(vx_orig, P_SIN, SIN_scale, lsb, vy_special, special_args, + // vlen); + + if (__riscv_vcpop(x_lt_0, vlen) > 0) { + // add 1 to argument + VFLOAT a_tmp_hi = __riscv_vfadd(vx_hi, fp_posOne, vlen); + VFLOAT a_tmp_lo = __riscv_vfrsub(a_tmp_hi, fp_posOne, vlen); + a_tmp_lo = __riscv_vfadd(a_tmp_lo, vx_hi, vlen); + vx_hi = __riscv_vmerge(vx_hi, a_tmp_hi, x_lt_0, vlen); + vx_lo = __riscv_vmerge(vx_lo, a_tmp_lo, x_lt_0, vlen); + } + + VFLOAT y_hi, y_lo; + VINT n, EXP; + + VINT XF = __riscv_vsll(VMVI_VX(1, vlen), 61, vlen); + VINT XF_scale = VMVI_VX(61, vlen); + + VBOOL x_lt_2 = __riscv_vmflt(vx_hi, 0x1.0p1, vlen); + + if (__riscv_vcpop(x_lt_2, vlen) > 0) { + // So x = 2^(-m) val, val is in [1, 2) + // Create fixed point X in scale Q(61+m) + VINT m = __riscv_vsra(F_AS_I(vx_hi), MAN_LEN, vlen); + m = __riscv_vrsub(__riscv_vand(m, 0x7FF, vlen), EXP_BIAS, vlen); + // at this point, m >= 0, x = 2^(-m) val, val in [1, 2) + VINT XF_scale_1 = __riscv_vadd(m, 61, vlen); + VINT scale_m = + __riscv_vsll(__riscv_vadd(XF_scale_1, EXP_BIAS, vlen), MAN_LEN, vlen); + VFLOAT x_tmp = __riscv_vfmul(vx_hi, I_AS_F(scale_m), vlen); + VINT X = __riscv_vfcvt_x(x_tmp, vlen); + // X is vx_hi in fixed-point, Q(61+m) + x_tmp = __riscv_vfmul(vx_lo, I_AS_F(scale_m), vlen); + X = __riscv_vadd(X, __riscv_vfcvt_x(x_tmp, vlen), vlen); + // X is (vx_hi + vx_lo) in fixed-point, Q(61+m) + VINT One_plus_X = + __riscv_vadd(XF, __riscv_vsra(X, I_AS_U(m), vlen), vlen); + // One_plus_X is 1+x in Q61 + VFLOAT b = VFMV_VF(fp_posZero, vlen); + + // if 1 <= x < 2, gamma(x) = (1/x) gamma(x+1) + // if 0 < x < 1, gamma(x) = (1/(x(x+1))) gamma(x+2) + VBOOL x_ge_1 = __riscv_vmfge(vx_hi, fp_posOne, vlen); + VBOOL cond = __riscv_vmand(x_lt_2, x_ge_1, vlen); + // cond is 1 <= x < 2 + XF_scale = __riscv_vmerge(XF_scale, XF_scale_1, cond, vlen); + XF = __riscv_vmerge(XF, X, cond, vlen); + b = __riscv_vfmerge(b, fp_posOne, cond, vlen); + // at this point, if input x is between [1, 2), XF is x in scale 61+m + // which is 61 (as m is 0). + + cond = __riscv_vmandn(x_lt_2, x_ge_1, vlen); + // cond is 0 < x < 1 + X = __riscv_vsmul(X, One_plus_X, 1, vlen); + XF_scale_1 = __riscv_vadd(m, 59, vlen); + XF_scale = __riscv_vmerge(XF_scale, XF_scale_1, cond, vlen); + XF = __riscv_vmerge(XF, X, cond, vlen); + b = __riscv_vfmerge(b, 0x1.0p1, cond, vlen); + // at this point, XF is either 1, x, or x(x+1) in fixed point + // scale given in XF_scale which is either 62, 61+m, or 59+m + + // now set (vx_hi, vx_lo) to x + b, b = 0, 1, or 2 + x_tmp = __riscv_vfadd(b, vx_hi, vlen); + VFLOAT x_tmp2 = __riscv_vfsub(b, x_tmp, vlen); + x_tmp2 = __riscv_vfadd(x_tmp2, vx_hi, vlen); + vx_hi = x_tmp; + vx_lo = __riscv_vfadd(vx_lo, x_tmp2, vlen); + } + + STIRLING_POWER(vx_hi, vx_lo, n, EXP, vlen); + /* Stirling factor is 2^n * e, EXP is e in Q62 */ + + VINT P_SC, Q_SC, Numer_tail, Denom_tail; + STIRLING_CORRECTION(vx_hi, vx_lo, P_SC, Q_SC, vlen); + /* correction term is 2 * P_SC / Q_SC, P_SC is Q78, Q_SC is Q79 */ + + /* 2^(n-61) * EXP * P_SC / Q_SC is gamma(x) for x >= 2 */ + VINT P = __riscv_vsmul(EXP, P_SC, 1, vlen); + /* P is Q77 */ + + /* now incoporate XF into Q_SC */ + VINT Q = __riscv_vsmul(XF, Q_SC, 1, vlen); + /* scale of Q is 79 - 63 + XF_scale = 16 + XF_scale */ + + /* difference is 16 + XF_scale - 77, which is XF_scale - 61 */ + XF_scale = __riscv_vsub(XF_scale, 61, vlen); + n = __riscv_vadd(n, XF_scale, vlen); + /* 2^n P / Q is the answer if input is positive */ + /* For negative input, the answer is the reciprocal times pi/sin(pi x) */ + + VINT Numer = P; + VINT Denom = Q; + VINT vy_sign; + vy_sign = __riscv_vxor(vy_sign, vy_sign, vlen); + + if (__riscv_vcpop(x_lt_0, vlen) > 0) { + /* we first recipricate and change n to negative n */ + Numer = __riscv_vmerge(Numer, Q, x_lt_0, vlen); + Denom = __riscv_vmerge(Denom, P, x_lt_0, vlen); + + VINT P_SIN, SIN_scale, lsb; + TGAMMA_SIN(vx_orig, P_SIN, SIN_scale, lsb, vy_special, special_args, + vlen); + + vy_sign = __riscv_vmerge(vy_sign, lsb, x_lt_0, vlen); + + P_SIN = __riscv_vsmul(P_SIN, Denom, 1, vlen); + Denom = __riscv_vmerge(Denom, P_SIN, x_lt_0, vlen); + + SIN_scale = __riscv_vsub(SIN_scale, 63, vlen); + VINT n_prime = __riscv_vrsub(n, 0, vlen); + n_prime = __riscv_vadd(n_prime, SIN_scale, vlen); + n = __riscv_vmerge(n, n_prime, x_lt_0, vlen); + } + + VFLOAT numer_hi, numer_lo, denom_hi, denom_lo; + numer_hi = __riscv_vfcvt_f(Numer, vlen); + Numer_tail = __riscv_vsub(Numer, __riscv_vfcvt_x(numer_hi, vlen), vlen); + numer_lo = __riscv_vfcvt_f(Numer_tail, vlen); + + denom_hi = __riscv_vfcvt_f(Denom, vlen); + Denom_tail = __riscv_vsub(Denom, __riscv_vfcvt_x(denom_hi, vlen), vlen); + denom_lo = __riscv_vfcvt_f(Denom_tail, vlen); + + DIV_N2D2(numer_hi, numer_lo, denom_hi, denom_lo, vy, vlen); + FAST_LDEXP(vy, n, vlen); + + vy_sign = __riscv_vsll(vy_sign, 63, vlen); + vy = __riscv_vfsgnjx(vy, I_AS_F(vy_sign), vlen); + + vy = __riscv_vmerge(vy, vy_special, special_args, vlen); + + // copy vy into y and increment addr pointers + VFSTORE_OUTARG1(vy, vlen); + + INCREMENT_INARG1(vlen); + INCREMENT_OUTARG1(vlen); + } + RESTORE_FRM; +} diff --git a/src/rvvlm_tgammaD.c b/src/rvvlm_tgammaD.c new file mode 100644 index 0000000..48038de --- /dev/null +++ b/src/rvvlm_tgammaD.c @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#define API_SIGNATURE API_SIGNATURE_11 +#define STRIDE UNIT_STRIDE + +#include RVVLM_TGAMMAD_VSET_CONFIG + +#include "rvvlm_gammafuncsD.h" + +#include "rvvlm_tgammaD.inc.h" diff --git a/src/rvvlm_tgammaDI.c b/src/rvvlm_tgammaDI.c new file mode 100644 index 0000000..98655ab --- /dev/null +++ b/src/rvvlm_tgammaDI.c @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#define API_SIGNATURE API_SIGNATURE_11 +#define STRIDE GENERAL_STRIDE + +#include RVVLM_TGAMMADI_VSET_CONFIG + +#include "rvvlm_gammafuncsD.h" + +#include "rvvlm_tgammaD.inc.h" diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f6bed89..e47ab42 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -79,6 +79,8 @@ set(TEST_SOURCES src/test_sinhI.cpp src/test_tanh.cpp src/test_tanhI.cpp + src/test_tgamma.cpp + src/test_tgammaI.cpp ) add_executable(test_veclibm ${TEST_SOURCES}) diff --git a/test/include/test_infra.h b/test/include/test_infra.h index eb8609b..2bed42b 100644 --- a/test/include/test_infra.h +++ b/test/include/test_infra.h @@ -101,3 +101,8 @@ long double recip_scale(long double); long double erfl_prime(long double); long double erfcl_prime(long double); long double cdfnorml_prime(long double); +long double log_4_stirling(long double); +long double stirling_power(long double); +long double stirling_correction(long double); +long double tgammal_mod(long double); +long double sinpix_by_pi(long double); diff --git a/test/src/test_acos.cpp b/test/src/test_acos.cpp index f0ee50f..de12eef 100644 --- a/test/src/test_acos.cpp +++ b/test/src/test_acos.cpp @@ -19,21 +19,21 @@ TEST(acos, test) { x_start = -0x1.0p-40; x_end = 0x1.0p-40; ; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_acos, acosl, x_start, x_end, nb_tests); x_start = -0.5; x_end = 0.5; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_acos, acosl, x_start, x_end, nb_tests); x_start = 0.5; x_end = 1.0; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_acos, acosl, x_start, x_end, nb_tests); x_start = -1.0; x_end = -0.5; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_acos, acosl, x_start, x_end, nb_tests); } diff --git a/test/src/test_acospi.cpp b/test/src/test_acospi.cpp index cf851f9..794e770 100644 --- a/test/src/test_acospi.cpp +++ b/test/src/test_acospi.cpp @@ -18,21 +18,21 @@ TEST(acospi, test) { x_start = -0x1.0p-40; x_end = 0x1.0p-40; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_acospi, acospil, x_start, x_end, nb_tests); x_start = -0.5; x_end = 0.5; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_acospi, acospil, x_start, x_end, nb_tests); x_start = 0.5; x_end = 1.0; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_acospi, acospil, x_start, x_end, nb_tests); x_start = -1.0; x_end = -0.5; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_acospi, acospil, x_start, x_end, nb_tests); } diff --git a/test/src/test_asinh.cpp b/test/src/test_asinh.cpp index 51f20ff..baf5b00 100644 --- a/test/src/test_asinh.cpp +++ b/test/src/test_asinh.cpp @@ -18,41 +18,41 @@ TEST(asinh, test) { x_start = 0x1.0p-40; x_end = 0x1.0p-35; - nb_tests = 40000; + nb_tests = 20000; report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); x_start = -0x1.0p-35; x_end = -0x1.0p-40; - nb_tests = 40000; + nb_tests = 20000; report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); x_start = 0x1.0p-20; x_end = 0x1.0p-10; - nb_tests = 40000; + nb_tests = 20000; report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); x_start = 0x1.0p-6; x_end = 0x1.0p0; - nb_tests = 40000; + nb_tests = 20000; report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); x_start = 0x1.0p0; x_end = 0x1.0p2; - nb_tests = 40000; + nb_tests = 20000; report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); x_start = -0x1.0p0; x_end = -0x1.0p2; - nb_tests = 40000; + nb_tests = 20000; report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); x_start = 0x1.0p490; x_end = 0x1.0p520; - nb_tests = 40000; + nb_tests = 20000; report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); x_start = 0x1.0p1020; x_end = 0x1.FFFFFFFFFFp1023; - nb_tests = 40000; + nb_tests = 20000; report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests); } diff --git a/test/src/test_atan2.cpp b/test/src/test_atan2.cpp index 6c890f8..972fd43 100644 --- a/test/src/test_atan2.cpp +++ b/test/src/test_atan2.cpp @@ -24,7 +24,7 @@ TEST(atan2, test) { nb_x = 8; y_start = 0x1.01p0; y_end = 0x1.fffp0; - nb_y = 20000; + nb_y = 2000; report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); @@ -34,7 +34,7 @@ TEST(atan2, test) { nb_x = 8; y_start = 0x1.01p1020; y_end = 0x1.ffffffffp1020; - nb_y = 20000; + nb_y = 2000; report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); @@ -44,7 +44,7 @@ TEST(atan2, test) { nb_x = 8; y_start = 0x1.01p-1020; y_end = 0x1.ffffffffp-1020; - nb_y = 20000; + nb_y = 2000; report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); @@ -54,7 +54,7 @@ TEST(atan2, test) { nb_x = 8; y_start = 0x1.01p0; y_end = 0x1.fffp0; - nb_y = 20000; + nb_y = 2000; report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); @@ -64,7 +64,7 @@ TEST(atan2, test) { nb_x = 8; y_start = 0x1.01p1020; y_end = 0x1.ffffffffp1020; - nb_y = 20000; + nb_y = 2000; report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); @@ -74,7 +74,7 @@ TEST(atan2, test) { nb_x = 8; y_start = 0x1.01p-1020; y_end = 0x1.ffffffffp-1020; - nb_y = 20000; + nb_y = 2000; report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); @@ -84,7 +84,7 @@ TEST(atan2, test) { nb_x = 8; y_start = -0x1.01p0; y_end = -0x1.fffp0; - nb_y = 20000; + nb_y = 2000; report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); @@ -94,7 +94,7 @@ TEST(atan2, test) { nb_x = 8; y_start = -0x1.01p1020; y_end = -0x1.ffffffffp1020; - nb_y = 20000; + nb_y = 2000; report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); @@ -104,7 +104,7 @@ TEST(atan2, test) { nb_x = 8; y_start = -0x1.01p-1020; y_end = -0x1.ffffffffp-1020; - nb_y = 200000; + nb_y = 2000; report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); @@ -114,7 +114,7 @@ TEST(atan2, test) { nb_x = 8; y_start = -0x1.01p0; y_end = -0x1.fffp0; - nb_y = 20000; + nb_y = 2000; report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); @@ -124,7 +124,7 @@ TEST(atan2, test) { nb_x = 8; y_start = -0x1.01p1020; y_end = -0x1.ffffffffp1020; - nb_y = 20000; + nb_y = 2000; report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); @@ -134,7 +134,7 @@ TEST(atan2, test) { nb_x = 8; y_start = -0x1.01p-1020; y_end = -0x1.ffffffffp-1020; - nb_y = 20000; + nb_y = 2000; report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); } diff --git a/test/src/test_atan2pi.cpp b/test/src/test_atan2pi.cpp index d3bbf65..85bf2f6 100644 --- a/test/src/test_atan2pi.cpp +++ b/test/src/test_atan2pi.cpp @@ -20,109 +20,109 @@ TEST(atan2pi, test) { x_start = 0x1.000000001p0; x_end = 0x1.ffffffffffp0; - nb_x = 8; + nb_x = 4; y_start = 0x1.01p0; y_end = 0x1.fffp0; - nb_y = 50000; + nb_y = 5000; report_err2_fp64(rvvlm_atan2pi, atan2pil, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); x_start = 0x1.000000001p0; x_end = 0x1.ffffffffffp0; - nb_x = 8; + nb_x = 4; y_start = 0x1.01p1020; y_end = 0x1.ffffffffp1020; - nb_y = 50000; + nb_y = 5000; report_err2_fp64(rvvlm_atan2pi, atan2pil, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); x_start = 0x1.000000001p0; x_end = 0x1.ffffffffffp0; - nb_x = 8; + nb_x = 4; y_start = 0x1.01p-1020; y_end = 0x1.ffffffffp-1020; - nb_y = 50000; + nb_y = 5000; report_err2_fp64(rvvlm_atan2pi, atan2pil, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); x_start = -0x1.000000001p0; x_end = -0x1.ffffffffffp0; - nb_x = 8; + nb_x = 4; y_start = 0x1.01p0; y_end = 0x1.fffp0; - nb_y = 50000; + nb_y = 5000; report_err2_fp64(rvvlm_atan2pi, atan2pil, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); x_start = -0x1.000000001p0; x_end = -0x1.ffffffffffp0; - nb_x = 8; + nb_x = 4; y_start = 0x1.01p1020; y_end = 0x1.ffffffffp1020; - nb_y = 50000; + nb_y = 5000; report_err2_fp64(rvvlm_atan2pi, atan2pil, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); x_start = -0x1.000000001p0; x_end = -0x1.ffffffffffp0; - nb_x = 8; + nb_x = 4; y_start = 0x1.01p-1020; y_end = 0x1.ffffffffp-1020; - nb_y = 50000; + nb_y = 5000; report_err2_fp64(rvvlm_atan2pi, atan2pil, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); x_start = 0x1.000000001p0; x_end = 0x1.ffffffffffp0; - nb_x = 8; + nb_x = 4; y_start = -0x1.01p0; y_end = -0x1.fffp0; - nb_y = 50000; + nb_y = 5000; report_err2_fp64(rvvlm_atan2pi, atan2pil, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); x_start = 0x1.000000001p0; x_end = 0x1.ffffffffffp0; - nb_x = 8; + nb_x = 4; y_start = -0x1.01p1020; y_end = -0x1.ffffffffp1020; - nb_y = 50000; + nb_y = 5000; report_err2_fp64(rvvlm_atan2pi, atan2pil, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); x_start = 0x1.000000001p0; x_end = 0x1.ffffffffffp0; - nb_x = 8; + nb_x = 4; y_start = -0x1.01p-1020; y_end = -0x1.ffffffffp-1020; - nb_y = 50000; + nb_y = 5000; report_err2_fp64(rvvlm_atan2pi, atan2pil, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); x_start = -0x1.000000001p0; x_end = -0x1.ffffffffffp0; - nb_x = 8; + nb_x = 4; y_start = -0x1.01p0; y_end = -0x1.fffp0; - nb_y = 50000; + nb_y = 5000; report_err2_fp64(rvvlm_atan2pi, atan2pil, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); x_start = -0x1.000000001p0; x_end = -0x1.ffffffffffp0; - nb_x = 8; + nb_x = 4; y_start = -0x1.01p1020; y_end = -0x1.ffffffffp1020; - nb_y = 50000; + nb_y = 5000; report_err2_fp64(rvvlm_atan2pi, atan2pil, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); x_start = -0x1.000000001p0; x_end = -0x1.ffffffffffp0; - nb_x = 8; + nb_x = 4; y_start = -0x1.01p-1020; y_end = -0x1.ffffffffp-1020; - nb_y = 50000; + nb_y = 5000; report_err2_fp64(rvvlm_atan2pi, atan2pil, x_start, x_end, nb_x, y_start, y_end, nb_y, swap_xy); } diff --git a/test/src/test_atanh.cpp b/test/src/test_atanh.cpp index da48400..9f1f492 100644 --- a/test/src/test_atanh.cpp +++ b/test/src/test_atanh.cpp @@ -18,41 +18,41 @@ TEST(atanh, test) { x_start = 0x1.0p-40; x_end = 0x1.0p-35; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); x_start = -0x1.0p-35; x_end = -0x1.0p-40; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); x_start = 0x1.0p-20; x_end = 0x1.0p-10; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); x_start = 0x1.0p-10; x_end = 0x1.ffp-3; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); x_start = 0x1.00p-9; x_end = 0x1.0p-2; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); x_start = 0x1.0p-1; x_end = 0x1.0p0 - 0x1.0p-51; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); x_start = -0x1.0p-1; x_end = -(0x1.0p0 - 0x1.0p-51); - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); x_start = -1.0 + 0x1.0p-53; x_end = 1.0 - 0x1.0p-51; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_atanh, atanhl, x_start, x_end, nb_tests); } diff --git a/test/src/test_cdfnorm.cpp b/test/src/test_cdfnorm.cpp index e96c5f3..6197ef8 100644 --- a/test/src/test_cdfnorm.cpp +++ b/test/src/test_cdfnorm.cpp @@ -18,36 +18,36 @@ TEST(cdfnorm, test) { x_start = -0x1.0p0; x_end = 0x1.0p0; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_cdfnorm, cdfnorml, x_start, x_end, nb_tests); x_start = 0x1.0p-2; x_end = 0x1.0p0; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_cdfnorm, cdfnorml, x_start, x_end, nb_tests); report_err_fp64(rvvlm_cdfnorm, cdfnorml, -x_start, -x_end, nb_tests); x_start = 0x1.0p0; x_end = 0x1.0p3; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_cdfnorm, cdfnorml, x_start, x_end, nb_tests); report_err_fp64(rvvlm_cdfnorm, cdfnorml, -x_start, -x_end, nb_tests); x_start = 0x1.0p3; x_end = 30.0; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_cdfnorm, cdfnorml, x_start, x_end, nb_tests); report_err_fp64(rvvlm_cdfnorm, cdfnorml, -x_start, -x_end, nb_tests); x_start = 0x1.0p-80; x_end = 0x1.0p-50; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_cdfnorm, cdfnorml, x_start, x_end, nb_tests); report_err_fp64(rvvlm_cdfnorm, cdfnorml, -x_start, -x_end, nb_tests); x_start = 0x1.0p-50; x_end = 0x1.0p-10; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_cdfnorm, cdfnorml, x_start, x_end, nb_tests); report_err_fp64(rvvlm_cdfnorm, cdfnorml, -x_start, -x_end, nb_tests); } diff --git a/test/src/test_cdfnorminv.cpp b/test/src/test_cdfnorminv.cpp index 27b684f..4e90d75 100644 --- a/test/src/test_cdfnorminv.cpp +++ b/test/src/test_cdfnorminv.cpp @@ -26,19 +26,19 @@ TEST(cdfnorminv, tiny_args) { x_start = 0x1.0p-1074; x_end = 0x1.0p-1000; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_cdfnorminv, cdfnorml, cdfnorml_prime, x_start, x_end, nb_tests); x_start = 0x1.0p-1000; x_end = 0x1.0p-500; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_cdfnorminv, cdfnorml, cdfnorml_prime, x_start, x_end, nb_tests); x_start = 0x1.0p-55; x_end = 0x1.0p-53; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_cdfnorminv, cdfnorml, cdfnorml_prime, x_start, x_end, nb_tests); } @@ -51,13 +51,13 @@ TEST(cdfnorminv, small_args) { x_start = 0x1.0p-50; x_end = 0x1.0p-20; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_cdfnorminv, cdfnorml, cdfnorml_prime, x_start, x_end, nb_tests); x_start = 0x1.0p-20; x_end = 0x1.0p-4; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_cdfnorminv, cdfnorml, cdfnorml_prime, x_start, x_end, nb_tests); } @@ -70,19 +70,19 @@ TEST(cdfnorminv, medium_args) { x_start = 0x1.0p-5; x_end = 0x1.0p-2; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_cdfnorminv, cdfnorml, cdfnorml_prime, x_start, x_end, nb_tests); x_start = 0x1.0p-2; x_end = 0x1.8p-1; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_cdfnorminv, cdfnorml, cdfnorml_prime, x_start, x_end, nb_tests); x_start = 1.0 - 0x1.0p-53; x_end = 0x1.8p-1; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_cdfnorminv, cdfnorml, cdfnorml_prime, x_start, x_end, nb_tests); } diff --git a/test/src/test_erf.cpp b/test/src/test_erf.cpp index 6f82462..cd05e2a 100644 --- a/test/src/test_erf.cpp +++ b/test/src/test_erf.cpp @@ -18,26 +18,26 @@ TEST(erf, test) { x_start = 0x1.0p-40; x_end = 0x1.0p-20; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_erf, erfl, x_start, x_end, nb_tests); x_start = 0x1.0p-20; x_end = 0x1.0p1; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_erf, erfl, x_start, x_end, nb_tests); x_start = 0x1.0p1; x_end = 0x1.0p2; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_erf, erfl, x_start, x_end, nb_tests); x_start = 0x1.0p2; x_end = 7.0; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_erf, erfl, x_start, x_end, nb_tests); x_start = -6.5; x_end = 6.5; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_erf, erfl, x_start, x_end, nb_tests); } diff --git a/test/src/test_erfcinv.cpp b/test/src/test_erfcinv.cpp index 5dfae21..7000476 100644 --- a/test/src/test_erfcinv.cpp +++ b/test/src/test_erfcinv.cpp @@ -25,19 +25,19 @@ TEST(erfcinv, tiny_args) { x_start = 0x1.0p-1074; x_end = 0x1.0p-1000; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_erfcinv, erfcl, erfcl_prime, x_start, x_end, nb_tests); x_start = 0x1.0p-1000; x_end = 0x1.0p-500; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_erfcinv, erfcl, erfcl_prime, x_start, x_end, nb_tests); x_start = 0x1.0p-55; x_end = 0x1.0p-53; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_erfcinv, erfcl, erfcl_prime, x_start, x_end, nb_tests); } @@ -51,14 +51,14 @@ TEST(erfcinv, small_args) { x_start = 0x1.0p-50; ; x_end = 0x1.0p-20; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_erfcinv, erfcl, erfcl_prime, x_start, x_end, nb_tests); x_start = 0x1.0p-20; ; x_end = 0x1.0p-4; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_erfcinv, erfcl, erfcl_prime, x_start, x_end, nb_tests); } @@ -72,20 +72,20 @@ TEST(erfcinv, medium_args) { x_start = 0x1.0p-4; ; x_end = 0x1.0p-1; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_erfcinv, erfcl, erfcl_prime, x_start, x_end, nb_tests); x_start = 0x1.0p-1; ; x_end = 0x1.8p0; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_erfcinv, erfcl, erfcl_prime, x_start, x_end, nb_tests); x_start = 2.0 - 0x1.0p-52; x_end = 0x1.8p0; - nb_tests = 40000; + nb_tests = 10000; report_err_byinv_fp64(rvvlm_erfcinv, erfcl, erfcl_prime, x_start, x_end, nb_tests); } diff --git a/test/src/test_erfinv.cpp b/test/src/test_erfinv.cpp index 9e7a436..81839b0 100644 --- a/test/src/test_erfinv.cpp +++ b/test/src/test_erfinv.cpp @@ -24,23 +24,20 @@ TEST(erfinv, small_args) { COMMENT("erfinv: current chosen algorithm; reduced argument in FP64 only") x_start = 0x1.0p-40; - ; x_end = 0x1.0p-30; - nb_tests = 40000; + nb_tests = 20000; report_err_byinv_fp64(rvvlm_erfinv, erfl, erfl_prime, x_start, x_end, nb_tests); x_start = 0x1.0p-30; - ; x_end = 0x1.0p-10; - nb_tests = 40000; + nb_tests = 20000; report_err_byinv_fp64(rvvlm_erfinv, erfl, erfl_prime, x_start, x_end, nb_tests); x_start = 0x1.0p-10; - ; x_end = 0x1.0p-4; - nb_tests = 40000; + nb_tests = 20000; report_err_byinv_fp64(rvvlm_erfinv, erfl, erfl_prime, x_start, x_end, nb_tests); } @@ -52,43 +49,38 @@ TEST(erfinv, middle_args) { COMMENT("erfinv: current chosen algorithm; reduced argument in FP64 only") x_start = 0x1.0p-4; - ; x_end = 0x1.0p-2; - nb_tests = 40000; + nb_tests = 20000; report_err_byinv_fp64(rvvlm_erfinv, erfl, erfl_prime, x_start, x_end, nb_tests); x_start = 0x1.0p-2; - ; x_end = 0x1.0p-1; - nb_tests = 40000; + nb_tests = 20000; report_err_byinv_fp64(rvvlm_erfinv, erfl, erfl_prime, x_start, x_end, nb_tests); x_start = 0x1.0p-1; - ; x_end = 0x1.78p-1; - nb_tests = 40000; + nb_tests = 20000; report_err_byinv_fp64(rvvlm_erfinv, erfl, erfl_prime, x_start, x_end, nb_tests); x_start = 0x1.6p-1; - ; x_end = 0x1.7cp-1; - nb_tests = 40000; + nb_tests = 20000; report_err_byinv_fp64(rvvlm_erfinv, erfl, erfl_prime, x_start, x_end, nb_tests); x_start = 0x1.71p-1; - ; x_end = 0x1.8p-1; - nb_tests = 40000; + nb_tests = 20000; report_err_byinv_fp64(rvvlm_erfinv, erfl, erfl_prime, x_start, x_end, nb_tests); x_start = -0.8; x_end = 0.8; - nb_tests = 40000; + nb_tests = 20000; report_err_byinv_fp64(rvvlm_erfinv, erfl, erfl_prime, x_start, x_end, nb_tests); @@ -107,7 +99,7 @@ TEST(erfinv, close_to_1) { x_start = 1.0 - 0x1.0p-53; x_end = 0x1.ffp-1; - nb_tests = 40000; + nb_tests = 20000; report_err_byinv_fp64(rvvlm_erfinv, erfl, erfl_prime, x_start, x_end, nb_tests); } diff --git a/test/src/test_exp10.cpp b/test/src/test_exp10.cpp index bf563ee..ffdc649 100644 --- a/test/src/test_exp10.cpp +++ b/test/src/test_exp10.cpp @@ -18,12 +18,12 @@ TEST(exp10, small_args) { x_start = -0.34; x_end = 0.34; - nb_tests = 300000; + nb_tests = 30000; report_err_fp64(rvvlm_exp10, exp10l, x_start, x_end, nb_tests); x_start = -3.0; x_end = 3.0; - nb_tests = 400000; + nb_tests = 40000; report_err_fp64(rvvlm_exp10, exp10l, x_start, x_end, nb_tests); } @@ -35,12 +35,12 @@ TEST(exp10, medium_args) { x_start = -15.0; x_end = -10.0; - nb_tests = 400000; + nb_tests = 40000; report_err_fp64(rvvlm_exp10, exp10l, x_start, x_end, nb_tests); x_start = 10.0; x_end = 15.0; - nb_tests = 400000; + nb_tests = 40000; report_err_fp64(rvvlm_exp10, exp10l, x_start, x_end, nb_tests); } @@ -52,11 +52,11 @@ TEST(exp10, large_args) { x_start = 295.0; x_end = 308.0; - nb_tests = 400000; + nb_tests = 40000; report_err_fp64(rvvlm_exp10, exp10l, x_start, x_end, nb_tests); x_start = -323.0; x_end = -300.0; - nb_tests = 400000; + nb_tests = 40000; report_err_fp64(rvvlm_exp10, exp10l, x_start, x_end, nb_tests); } diff --git a/test/src/test_exp2.cpp b/test/src/test_exp2.cpp index 25358d7..9ff2919 100644 --- a/test/src/test_exp2.cpp +++ b/test/src/test_exp2.cpp @@ -18,12 +18,12 @@ TEST(exp2, small_args) { x_start = -0.34; x_end = 0.34; - nb_tests = 300000; + nb_tests = 80000; report_err_fp64(rvvlm_exp2, exp2l, x_start, x_end, nb_tests); x_start = -3.0; x_end = 3.0; - nb_tests = 400000; + nb_tests = 100000; report_err_fp64(rvvlm_exp2, exp2l, x_start, x_end, nb_tests); } @@ -35,12 +35,12 @@ TEST(exp2, medium_args) { x_start = -14.0; x_end = -10.0; - nb_tests = 400000; + nb_tests = 100000; report_err_fp64(rvvlm_exp2, exp2l, x_start, x_end, nb_tests); x_start = 10.0; x_end = 15.0; - nb_tests = 400000; + nb_tests = 100000; report_err_fp64(rvvlm_exp2, exp2l, x_start, x_end, nb_tests); } diff --git a/test/src/test_expm1.cpp b/test/src/test_expm1.cpp index 0369d62..841c12d 100644 --- a/test/src/test_expm1.cpp +++ b/test/src/test_expm1.cpp @@ -18,12 +18,12 @@ TEST(expm1, small_args) { x_start = -0.01; x_end = 0.01; - nb_tests = 300000; + nb_tests = 30000; report_err_fp64(rvvlm_expm1, expm1l, x_start, x_end, nb_tests); x_start = -.3; x_end = 0.3; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_expm1, expm1l, x_start, x_end, nb_tests); } @@ -35,12 +35,12 @@ TEST(expm1, medium_args) { x_start = 1.0; x_end = 10.0; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_expm1, expm1l, x_start, x_end, nb_tests); x_start = -10.0; x_end = -1.0; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_expm1, expm1l, x_start, x_end, nb_tests); x_start = -40.0; @@ -50,7 +50,7 @@ TEST(expm1, medium_args) { x_start = 36.0; x_end = 40.0; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_expm1, expm1l, x_start, x_end, nb_tests); } @@ -62,11 +62,11 @@ TEST(expm1, large_args) { x_start = 700.0; x_end = 709.0; - nb_tests = 400000; + nb_tests = 40000; report_err_fp64(rvvlm_expm1, expm1l, x_start, x_end, nb_tests); x_start = -50.0; x_end = -40.0; - nb_tests = 400000; + nb_tests = 40000; report_err_fp64(rvvlm_expm1, expm1l, x_start, x_end, nb_tests); } diff --git a/test/src/test_infra.cpp b/test/src/test_infra.cpp index ec2acd4..bb0eda0 100644 --- a/test/src/test_infra.cpp +++ b/test/src/test_infra.cpp @@ -1313,3 +1313,40 @@ long double cdfnorml_prime(long double x) { y = one_ov_rt_2pi * expl(-x * x * half); return y; } + +long double log_4_stirling(long double x) { + long double e = 2.7182818284590452353602874713526615L; + long double y = x / e; + y = logl(y); + return y; +} + +long double stirling_power(long double x) { + long double e = 2.7182818284590452353602874713526615L; + long double half = 0x1.0p-1L; + long double x_minus_half = x - half; + long double y = x / e; + y = powl(y, x_minus_half); + return y; +} + +long double stirling_correction(long double x) { + long double stirling = stirling_power(x); + long double y = tgammal(x) / stirling; + return y; +} + +long double tgammal_mod(long double x) { + // tgammal(x) for x >= 0; else tgamma(1-x) + long double one_minus_x = 1.0L - x; + long double y; + y = (x >= 0.0) ? tgammal(x) : tgammal(one_minus_x); + return y; +} + +long double sinpix_by_pi(long double x) { + long double pi = 0x1.921f'b5444'2d18'4698'98cc'5170'1b83'9p1L; + long double y = pi * x; + y = -sinl(y) / pi; + return y; +} diff --git a/test/src/test_log.cpp b/test/src/test_log.cpp index 965b37e..9ec687b 100644 --- a/test/src/test_log.cpp +++ b/test/src/test_log.cpp @@ -25,17 +25,17 @@ TEST(log, around_1) { x_start = 0.7; x_end = 1.5; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_log, logl, x_start, x_end, nb_tests); x_start = 1.0; x_end = 4.0; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_log, logl, x_start, x_end, nb_tests); x_start = 0.25; x_end = 1.0; - nb_tests = 1000000; + nb_tests = 100000; report_err_fp64(rvvlm_log, logl, x_start, x_end, nb_tests); } @@ -47,11 +47,11 @@ TEST(log, extreme_args) { x_start = 0x1.0p1023; x_end = 0x1.ffffffp1023; - nb_tests = 400000; + nb_tests = 100000; report_err_fp64(rvvlm_log, logl, x_start, x_end, nb_tests); x_start = 0x1.0p-1074; x_end = 0x1.0p-1022; - nb_tests = 400000; + nb_tests = 100000; report_err_fp64(rvvlm_log, logl, x_start, x_end, nb_tests); } diff --git a/test/src/test_log2.cpp b/test/src/test_log2.cpp index 3508c70..7c39aa7 100644 --- a/test/src/test_log2.cpp +++ b/test/src/test_log2.cpp @@ -25,17 +25,17 @@ TEST(log2, around_1) { x_start = 0.7; x_end = 1.5; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_log2, log2l, x_start, x_end, nb_tests); x_start = 1.0; x_end = 4.0; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_log2, log2l, x_start, x_end, nb_tests); x_start = 0.25; x_end = 1.0; - nb_tests = 40000; + nb_tests = 10000; report_err_fp64(rvvlm_log2, log2l, x_start, x_end, nb_tests); } @@ -47,11 +47,11 @@ TEST(log2, extreme_args) { x_start = 0x1.0p1023; x_end = 0x1.ffffffp1023; - nb_tests = 400000; + nb_tests = 10000; report_err_fp64(rvvlm_log2, log2l, x_start, x_end, nb_tests); x_start = 0x1.0p-1074; x_end = 0x1.0p-1022; - nb_tests = 400000; + nb_tests = 10000; report_err_fp64(rvvlm_log2, log2l, x_start, x_end, nb_tests); } diff --git a/test/src/test_pow.cpp b/test/src/test_pow.cpp index 9411f0f..f8c4b2f 100644 --- a/test/src/test_pow.cpp +++ b/test/src/test_pow.cpp @@ -33,15 +33,15 @@ TEST(pow, medium_args) { x_start = 0x1.0p-3; x_end = 0x1.0p4; - nb_pts_x = 400; + nb_pts_x = 200; y_start = 0x1.0p-3; y_end = 0x1.0p4; - nb_pts_y = 400; + nb_pts_y = 200; report_err2_fp64(rvvlm_pow, powl, x_start, x_end, nb_pts_x, y_start, y_end, nb_pts_y, swap_xy); - nb_targets = 10; - nb_tests = 10000; + nb_targets = 5; + nb_tests = 1000; target_start = -32.0 * log(2.0); target_end = 32.0 * log(2.0); @@ -69,8 +69,8 @@ TEST(pow, large_args) { COMMENT("pow: current chosen algorithm; reduced argument in FP64 only") - nb_targets = 10; - nb_tests = 100000; + nb_targets = 5; + nb_tests = 1000; x_start = 0x1.0p-10; x_end = 0x1.0p10; diff --git a/test/src/test_tan.cpp b/test/src/test_tan.cpp index 317d3b6..e029114 100644 --- a/test/src/test_tan.cpp +++ b/test/src/test_tan.cpp @@ -25,12 +25,12 @@ TEST(tan, small_args) { x_start = -0.7; x_end = 0.7; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_tan, tanl, x_start, x_end, nb_tests); x_start = -6.0; x_end = 6.0; - nb_tests = 100000; + nb_tests = 10000; report_err_fp64(rvvlm_tan, tanl, x_start, x_end, nb_tests); } @@ -42,16 +42,16 @@ TEST(tan, large_args) { x_start = 1.0; x_end = 0x1.0p23; - nb_tests = 100000; + nb_tests = 30000; report_err_fp64(rvvlm_tan, tanl, x_start, x_end, nb_tests); x_start = 0x1.0p25; x_end = 0x1.0p100; - nb_tests = 100000; + nb_tests = 30000; report_err_fp64(rvvlm_tan, tanl, x_start, x_end, nb_tests); x_start = 0x1.ffffp1023; x_end = 0x1.0p1000; - nb_tests = 100000; + nb_tests = 30000; report_err_fp64(rvvlm_tan, tanl, x_start, x_end, nb_tests); } diff --git a/test/src/test_tgamma.cpp b/test/src/test_tgamma.cpp new file mode 100644 index 0000000..2171cfa --- /dev/null +++ b/test/src/test_tgamma.cpp @@ -0,0 +1,83 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#include "test_infra.h" + +TEST(tgamma, special) { + unsigned long nb_tests; + double x_start, x_end; + + COMMENT("tgamma: current chosen algorithm; reduced argument in FP64 only") + + show_special_fp64(rvvlm_tgamma, "Special Value handling of this function"); +} + +TEST(tgamma, gamma_tiny) { + unsigned long nb_tests; + double x_start, x_end; + + x_start = 0x1.0p-120; + x_end = 0x1.0p-118; + nb_tests = 1000; + report_err_fp64(rvvlm_tgamma, tgammal, x_start, x_end, nb_tests); + + x_start = 0x1.0p-50; + x_end = 0x1.0p-40; + nb_tests = 1000; + report_err_fp64(rvvlm_tgamma, tgammal, x_start, x_end, nb_tests); + + x_start = -0x1.0p-118; + x_end = -0x1.0p-120; + nb_tests = 1000; + report_err_fp64(rvvlm_tgamma, tgammal, x_start, x_end, nb_tests); + + x_start = -0x1.0p-40; + x_end = -0x1.0p-50; + nb_tests = 1000; + report_err_fp64(rvvlm_tgamma, tgammal, x_start, x_end, nb_tests); +} + +TEST(tgamma, gamma_moderate) { + unsigned long nb_tests; + double x_start, x_end; + + x_start = 0.5; + x_end = 4.0; + nb_tests = 1000; + report_err_fp64(rvvlm_tgamma, tgammal, x_start, x_end, nb_tests); + + x_start = 4.0; + x_end = 10.0; + nb_tests = 1000; + report_err_fp64(rvvlm_tgamma, tgammal, x_start, x_end, nb_tests); + + x_start = -0.9; + x_end = -0.2; + nb_tests = 1000; + report_err_fp64(rvvlm_tgamma, tgammal, x_start, x_end, nb_tests); + + x_start = -10.1; + x_end = -0.9; + nb_tests = 1231; + report_err_fp64(rvvlm_tgamma, tgammal, x_start, x_end, nb_tests); +} + +TEST(tgamma, gamma_large) { + unsigned long nb_tests; + double x_start, x_end; + + x_start = 100.0; + x_end = 171.0; + nb_tests = 1000; + report_err_fp64(rvvlm_tgamma, tgammal, x_start, x_end, nb_tests); + + x_start = -171.1; + x_end = -100.3; + nb_tests = 1317; + report_err_fp64(rvvlm_tgamma, tgammal, x_start, x_end, nb_tests); +} diff --git a/test/src/test_tgammaI.cpp b/test/src/test_tgammaI.cpp new file mode 100644 index 0000000..ec67b20 --- /dev/null +++ b/test/src/test_tgammaI.cpp @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: 2023 Rivos Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "rvvlm.h" +#include "test_infra.h" + +TEST(tgammaI, test) { + unsigned long nb_tests; + double x_start, x_end; + + COMMENT("tgammaI: current chosen algorithm; reduced argument in FP64 only") + + x_start = 0.5; + x_end = 1.75; + nb_tests = 30; + int stride_x = 21; + int stride_y = 30; + report_err_fp64(rvvlm_tgammaI, tgammal, x_start, x_end, nb_tests, stride_x, + stride_y); +}