Skip to content

Commit

Permalink
Merge pull request #29 from rivosinc/dev/PingTakPeterTang/gamma
Browse files Browse the repository at this point in the history
add the FP64 true gamma function tgamma
  • Loading branch information
PingTakPeterTang authored Mar 29, 2024
2 parents 194eb48 + ca948ac commit bddd67f
Show file tree
Hide file tree
Showing 29 changed files with 816 additions and 143 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ set(PROJECT_SOURCES
src/rvvlm_sinhDI.c
src/rvvlm_tanhD.c
src/rvvlm_tanhDI.c
src/rvvlm_tgammaD.c
src/rvvlm_tgammaDI.c
)

add_library(vecm
Expand Down
23 changes: 23 additions & 0 deletions include/rvvlm.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,17 @@ union sui64_fp64 {
(delta_Q) = __riscv_vfmul(_q, __riscv_vfrec7((denom), (vlen)), (vlen)); \
} while (0)

#define ACC_DIV2_N2D2(numer, delta_n, denom, delta_d, Q, delta_Q, vlen) \
do { \
VFLOAT _recip, _q; \
_recip = __riscv_vfrdiv((denom), 0x1.0p0, (vlen)); \
(Q) = __riscv_vfmul((numer), _recip, (vlen)); \
_q = __riscv_vfnmsub((Q), (denom), (numer), (vlen)); \
_q = __riscv_vfnmsac(_q, (Q), (delta_d), (vlen)); \
_q = __riscv_vfadd(_q, (delta_n), (vlen)); \
(delta_Q) = __riscv_vfmul(_q, _recip, (vlen)); \
} while (0)

#define SQRT2_X2(x, delta_x, r, delta_r, vlen) \
do { \
VFLOAT xx = __riscv_vfadd((x), (delta_x), (vlen)); \
Expand Down Expand Up @@ -469,6 +480,13 @@ union sui64_fp64 {
#define RVVLM_TANPIDI_VSET_CONFIG "rvvlm_fp64m2.h"
#define RVVLM_TANPIDI_MERGED rvvlm_tanpiI

// FP64 tgamma function configuration
#define RVVLM_TGAMMAD_VSET_CONFIG "rvvlm_fp64m1.h"
#define RVVLM_TGAMMAD_STD rvvlm_tgamma

#define RVVLM_TGAMMADI_VSET_CONFIG "rvvlm_fp64m1.h"
#define RVVLM_TGAMMADI_STD rvvlm_tgammaI

// FP64 cosh function configuration
#define RVVLM_COSHD_VSET_CONFIG "rvvlm_fp64m2.h"
#define RVVLM_COSHD_STD rvvlm_coshD_std
Expand Down Expand Up @@ -499,6 +517,7 @@ extern int64_t expD_tbl64_fixedpt[64];
extern int64_t logD_tbl128_fixedpt[128];
extern double logtbl_4_powD_128_hi_lo[256];
extern double dbl_2ovpi_tbl[28];
extern int64_t factorial_fixedpt[180];

// Define the functions in the vector math library
void RVVLM_ACOSD_FIXEDPT(size_t x_len, const double *x, double *y);
Expand Down Expand Up @@ -703,6 +722,10 @@ void RVVLM_TANHD_STD(size_t x_len, const double *x, double *y);
void RVVLM_TANHDI_STD(size_t x_len, const double *x, size_t stride_x, double *y,
size_t stride_y);

void RVVLM_TGAMMAD_STD(size_t x_len, const double *x, double *y);
void RVVLM_TGAMMADI_STD(size_t x_len, const double *x, size_t stride_x,
double *y, size_t stride_y);

#ifdef __cplusplus
}
#endif
27 changes: 27 additions & 0 deletions include/rvvlm_gammafuncsD.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// SPDX-FileCopyrightText: 2023 Rivos Inc.
//
// SPDX-License-Identifier: Apache-2.0

// gamma(+inf) = +inf; gamma(-inf/sNaN) is qNaN with invalid
// gamma(qNaN) is qNaN
// gamma(+-0) is +-inf and divide by 0
// gamma(tiny) is 1/tiny
#define EXCEPTION_HANDLING_TGAMMA(vx, special_args, vy_special, vlen) \
do { \
VUINT expo_x = __riscv_vand(__riscv_vsrl(F_AS_U((vx)), MAN_LEN, (vlen)), \
0x7FF, (vlen)); \
VBOOL x_small = __riscv_vmsltu(expo_x, EXP_BIAS - 60, (vlen)); \
VBOOL x_InfNaN = __riscv_vmseq(expo_x, 0x7FF, (vlen)); \
(special_args) = __riscv_vmor(x_small, x_InfNaN, (vlen)); \
if (__riscv_vcpop((special_args), (vlen)) > 0) { \
VUINT vclass = __riscv_vfclass((vx), (vlen)); \
VBOOL x_negInf; \
IDENTIFY(vclass, class_negInf, x_negInf, (vlen)); \
(vx) = __riscv_vfmerge((vx), fp_sNaN, x_negInf, (vlen)); \
VFLOAT y_tmp = __riscv_vfadd(x_InfNaN, (vx), (vx), (vlen)); \
(vy_special) = __riscv_vmerge((vy_special), y_tmp, x_InfNaN, (vlen)); \
y_tmp = __riscv_vfrdiv(x_small, (vx), fp_posOne, (vlen)); \
(vy_special) = __riscv_vmerge((vy_special), y_tmp, x_small, (vlen)); \
(vx) = __riscv_vfmerge((vx), fp_posOne, (special_args), (vlen)); \
} \
} while (0)
446 changes: 446 additions & 0 deletions include/rvvlm_tgammaD.inc.h

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions src/rvvlm_tgammaD.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// SPDX-FileCopyrightText: 2023 Rivos Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include <riscv_vector.h>
#include <stdio.h>

#include "rvvlm.h"
#define API_SIGNATURE API_SIGNATURE_11
#define STRIDE UNIT_STRIDE

#include RVVLM_TGAMMAD_VSET_CONFIG

#include "rvvlm_gammafuncsD.h"

#include "rvvlm_tgammaD.inc.h"
16 changes: 16 additions & 0 deletions src/rvvlm_tgammaDI.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// SPDX-FileCopyrightText: 2023 Rivos Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include <riscv_vector.h>
#include <stdio.h>

#include "rvvlm.h"
#define API_SIGNATURE API_SIGNATURE_11
#define STRIDE GENERAL_STRIDE

#include RVVLM_TGAMMADI_VSET_CONFIG

#include "rvvlm_gammafuncsD.h"

#include "rvvlm_tgammaD.inc.h"
2 changes: 2 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ set(TEST_SOURCES
src/test_sinhI.cpp
src/test_tanh.cpp
src/test_tanhI.cpp
src/test_tgamma.cpp
src/test_tgammaI.cpp
)

add_executable(test_veclibm ${TEST_SOURCES})
Expand Down
5 changes: 5 additions & 0 deletions test/include/test_infra.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,8 @@ long double recip_scale(long double);
long double erfl_prime(long double);
long double erfcl_prime(long double);
long double cdfnorml_prime(long double);
long double log_4_stirling(long double);
long double stirling_power(long double);
long double stirling_correction(long double);
long double tgammal_mod(long double);
long double sinpix_by_pi(long double);
8 changes: 4 additions & 4 deletions test/src/test_acos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,21 @@ TEST(acos, test) {
x_start = -0x1.0p-40;
x_end = 0x1.0p-40;
;
nb_tests = 100000;
nb_tests = 10000;
report_err_fp64(rvvlm_acos, acosl, x_start, x_end, nb_tests);

x_start = -0.5;
x_end = 0.5;
nb_tests = 100000;
nb_tests = 10000;
report_err_fp64(rvvlm_acos, acosl, x_start, x_end, nb_tests);

x_start = 0.5;
x_end = 1.0;
nb_tests = 100000;
nb_tests = 10000;
report_err_fp64(rvvlm_acos, acosl, x_start, x_end, nb_tests);

x_start = -1.0;
x_end = -0.5;
nb_tests = 100000;
nb_tests = 10000;
report_err_fp64(rvvlm_acos, acosl, x_start, x_end, nb_tests);
}
8 changes: 4 additions & 4 deletions test/src/test_acospi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,21 @@ TEST(acospi, test) {

x_start = -0x1.0p-40;
x_end = 0x1.0p-40;
nb_tests = 100000;
nb_tests = 10000;
report_err_fp64(rvvlm_acospi, acospil, x_start, x_end, nb_tests);

x_start = -0.5;
x_end = 0.5;
nb_tests = 100000;
nb_tests = 10000;
report_err_fp64(rvvlm_acospi, acospil, x_start, x_end, nb_tests);

x_start = 0.5;
x_end = 1.0;
nb_tests = 100000;
nb_tests = 10000;
report_err_fp64(rvvlm_acospi, acospil, x_start, x_end, nb_tests);

x_start = -1.0;
x_end = -0.5;
nb_tests = 100000;
nb_tests = 10000;
report_err_fp64(rvvlm_acospi, acospil, x_start, x_end, nb_tests);
}
16 changes: 8 additions & 8 deletions test/src/test_asinh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,41 +18,41 @@ TEST(asinh, test) {

x_start = 0x1.0p-40;
x_end = 0x1.0p-35;
nb_tests = 40000;
nb_tests = 20000;
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);

x_start = -0x1.0p-35;
x_end = -0x1.0p-40;
nb_tests = 40000;
nb_tests = 20000;
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);

x_start = 0x1.0p-20;
x_end = 0x1.0p-10;
nb_tests = 40000;
nb_tests = 20000;
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);

x_start = 0x1.0p-6;
x_end = 0x1.0p0;
nb_tests = 40000;
nb_tests = 20000;
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);

x_start = 0x1.0p0;
x_end = 0x1.0p2;
nb_tests = 40000;
nb_tests = 20000;
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);

x_start = -0x1.0p0;
x_end = -0x1.0p2;
nb_tests = 40000;
nb_tests = 20000;
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);

x_start = 0x1.0p490;
x_end = 0x1.0p520;
nb_tests = 40000;
nb_tests = 20000;
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);

x_start = 0x1.0p1020;
x_end = 0x1.FFFFFFFFFFp1023;
nb_tests = 40000;
nb_tests = 20000;
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
}
24 changes: 12 additions & 12 deletions test/src/test_atan2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ TEST(atan2, test) {
nb_x = 8;
y_start = 0x1.01p0;
y_end = 0x1.fffp0;
nb_y = 20000;
nb_y = 2000;
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
nb_y, swap_xy);

Expand All @@ -34,7 +34,7 @@ TEST(atan2, test) {
nb_x = 8;
y_start = 0x1.01p1020;
y_end = 0x1.ffffffffp1020;
nb_y = 20000;
nb_y = 2000;
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
nb_y, swap_xy);

Expand All @@ -44,7 +44,7 @@ TEST(atan2, test) {
nb_x = 8;
y_start = 0x1.01p-1020;
y_end = 0x1.ffffffffp-1020;
nb_y = 20000;
nb_y = 2000;
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
nb_y, swap_xy);

Expand All @@ -54,7 +54,7 @@ TEST(atan2, test) {
nb_x = 8;
y_start = 0x1.01p0;
y_end = 0x1.fffp0;
nb_y = 20000;
nb_y = 2000;
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
nb_y, swap_xy);

Expand All @@ -64,7 +64,7 @@ TEST(atan2, test) {
nb_x = 8;
y_start = 0x1.01p1020;
y_end = 0x1.ffffffffp1020;
nb_y = 20000;
nb_y = 2000;
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
nb_y, swap_xy);

Expand All @@ -74,7 +74,7 @@ TEST(atan2, test) {
nb_x = 8;
y_start = 0x1.01p-1020;
y_end = 0x1.ffffffffp-1020;
nb_y = 20000;
nb_y = 2000;
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
nb_y, swap_xy);

Expand All @@ -84,7 +84,7 @@ TEST(atan2, test) {
nb_x = 8;
y_start = -0x1.01p0;
y_end = -0x1.fffp0;
nb_y = 20000;
nb_y = 2000;
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
nb_y, swap_xy);

Expand All @@ -94,7 +94,7 @@ TEST(atan2, test) {
nb_x = 8;
y_start = -0x1.01p1020;
y_end = -0x1.ffffffffp1020;
nb_y = 20000;
nb_y = 2000;
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
nb_y, swap_xy);

Expand All @@ -104,7 +104,7 @@ TEST(atan2, test) {
nb_x = 8;
y_start = -0x1.01p-1020;
y_end = -0x1.ffffffffp-1020;
nb_y = 200000;
nb_y = 2000;
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
nb_y, swap_xy);

Expand All @@ -114,7 +114,7 @@ TEST(atan2, test) {
nb_x = 8;
y_start = -0x1.01p0;
y_end = -0x1.fffp0;
nb_y = 20000;
nb_y = 2000;
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
nb_y, swap_xy);

Expand All @@ -124,7 +124,7 @@ TEST(atan2, test) {
nb_x = 8;
y_start = -0x1.01p1020;
y_end = -0x1.ffffffffp1020;
nb_y = 20000;
nb_y = 2000;
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
nb_y, swap_xy);

Expand All @@ -134,7 +134,7 @@ TEST(atan2, test) {
nb_x = 8;
y_start = -0x1.01p-1020;
y_end = -0x1.ffffffffp-1020;
nb_y = 20000;
nb_y = 2000;
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
nb_y, swap_xy);
}
Loading

0 comments on commit bddd67f

Please sign in to comment.