Skip to content

Commit

Permalink
improved accuracy of erfcinv for tiny arguments
Browse files Browse the repository at this point in the history
  • Loading branch information
PingTakPeterTang committed Mar 11, 2024
1 parent 98cbd67 commit 7afc81e
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 13 deletions.
10 changes: 10 additions & 0 deletions include/rvvlm.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ union sui64_fp64 {
} \
} while (0)

#define VSRL_I_AS_U(x, nbits, vlen) \
U_AS_I(__riscv_vsrl(I_AS_U((x)), (nbits), (vlen)))

#define PSTEP(coeff_j, x, poly, vlen) \
__riscv_vfmadd((poly), (x), VFMV_VF((coeff_j), (vlen)), (vlen))

Expand All @@ -93,6 +96,13 @@ union sui64_fp64 {
__riscv_vsra(__riscv_vsmul((POLY), (X), 1, (vlen)), (K), (vlen)), \
(COEFF_j), (vlen))

#define PSTEP_I_HI_SRA(COEFF_j, X, K, POLY, vlen) \
__riscv_vadd(__riscv_vsra(__riscv_vmulh((POLY), (X), (vlen)), (K), (vlen)), \
(COEFF_j), (vlen))

#define PSTEP_I_HI(COEFF_j, X, POLY, vlen) \
__riscv_vadd(__riscv_vmulh((POLY), (X), (vlen)), (COEFF_j), (vlen))

#define PSTEPN_I(COEFF_j, X, POLY, vlen) \
__riscv_vrsub(__riscv_vsmul((POLY), (X), 1, (vlen)), (COEFF_j), (vlen))

Expand Down
7 changes: 4 additions & 3 deletions include/rvvlm_erfcinvD.inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ void F_VER1(API) {
0x1.0p-63, vlen);
if (__riscv_vcpop(x_is_tiny, vlen) > 0) {
VFLOAT w_hi_dummy, w_lo_dummy;
SQRTX_4_TRANSFORM(y_hi, y_lo, w_hi_dummy, w_lo_dummy, T_tiny, 0x1.0p65,
0x1.0p-65, vlen);
SQRTX_4_TRANSFORM(y_hi, y_lo, w_hi_dummy, w_lo_dummy, T_tiny, 0x1.0p64,
0x1.0p-64, vlen);
}
}
w_hi = VFMV_VF(fp_posOne, vlen);
Expand Down Expand Up @@ -195,7 +195,8 @@ void F_VER1(API) {

if (__riscv_vcpop(x_is_tiny, vlen) > 0) {
VFLOAT p_hi_tiny, p_lo_tiny, q_hi_tiny, q_lo_tiny;
ERFCINV_PQ_TINY(T_tiny, p_hi_tiny, p_lo_tiny, q_hi_tiny, q_lo_tiny, vlen);
ERFCINV_PQ_HILO_TINY(T_tiny, p_hi_tiny, p_lo_tiny, q_hi_tiny, q_lo_tiny,
vlen);
p_hi = __riscv_vmerge(p_hi, p_hi_tiny, x_is_tiny, vlen);
p_lo = __riscv_vmerge(p_lo, p_lo_tiny, x_is_tiny, vlen);
q_hi = __riscv_vmerge(q_hi, q_hi_tiny, x_is_tiny, vlen);
Expand Down
116 changes: 116 additions & 0 deletions include/rvvlm_inverrorfuncsD.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,52 @@
#define Q_tiny_9 0x5e4a26a7c1415755 // sacle 57
#define DELTA_Q0_tiny 0x1.8a7adad44d65ap-4 // scale 66

#define Q50_84
// Using [P,Q]_tiny_[HI,LO]_k, HI in Q50, LO in Q84
#if defined(Q50_84)
#define P_tiny_HI_0 -0x8593442eL
#define P_tiny_LO_0 -0x4e7245b3L
#define P_tiny_HI_1 -0x7f3dc156b1L
#define P_tiny_LO_1 -0x1f0300096L
#define P_tiny_HI_2 -0x20dbbc67b1b8L
#define P_tiny_LO_2 -0xbc59b742L
#define P_tiny_HI_3 -0x30c75b8264d44L
#define P_tiny_LO_3 -0x18a421ab9L
#define P_tiny_HI_4 -0x1ab06f39e5addeL
#define P_tiny_LO_4 -0x180f2a477L
#define P_tiny_HI_5 -0x3d698fc2964a9cL
#define P_tiny_LO_5 0xc3d4ab0bL
#define P_tiny_HI_6 0x7f7d6a748d0c2fL
#define P_tiny_LO_6 0x1729754e9L
#define P_tiny_HI_7 0x18b100818c77848L
#define P_tiny_LO_7 0x1aca73439L
#define P_tiny_HI_8 0x4be1ed5d031774L
#define P_tiny_LO_8 -0x3b6c5afbL
#define P_tiny_HI_9 0x156a7e2e54e260L
#define P_tiny_LO_9 0x1a0c336beL

#define Q_tiny_HI_0 -0x85933cdaL
#define Q_tiny_LO_0 -0xb5b39d61L
#define Q_tiny_HI_1 -0x7f3de4b69fL
#define Q_tiny_LO_1 -0x151d1cd35L
#define Q_tiny_HI_2 -0x20dd8dc1da27L
#define Q_tiny_LO_2 -0x1706945d7L
#define Q_tiny_HI_3 -0x30dc92d1cd231L
#define Q_tiny_LO_3 0xabde03f9L
#define Q_tiny_HI_4 -0x1af5fcee397d58L
#define Q_tiny_LO_4 -0xc3530d28L
#define Q_tiny_HI_5 -0x42639eeec1d051L
#define Q_tiny_LO_5 0x662b41ecL
#define Q_tiny_HI_6 0x6182b99f6ca998L
#define Q_tiny_LO_6 0x938a5e35L
#define Q_tiny_HI_7 0x17a6848dc07624aL
#define Q_tiny_LO_7 0x8a0484b7L
#define Q_tiny_HI_8 0x105ecd6aac52b12L
#define Q_tiny_LO_8 0x1d1e38258L
#define Q_tiny_HI_9 0xbc944d4f8282afL
#define Q_tiny_LO_9 -0x155b50b48L
#endif

// erfinv(+-1) = +-Inf with divide by zero
// erfinv(x) |x| > 1, real is NaN with invalid
// erfinv(NaN) is NaN, invalid if input is signalling NaN
Expand Down Expand Up @@ -211,3 +257,73 @@
__riscv_vsub(Q, __riscv_vfcvt_x(q_hi_tiny, (vlen)), (vlen)), (vlen)); \
q_lo_tiny = __riscv_vfadd(q_lo, DELTA_Q0_tiny, (vlen)); \
} while (0)

#define UPDATE_P_LO(COEFF, T, P_HI, P_LO, P_tmp, K, vlen) \
do { \
(P_LO) = PSTEP_I_HI((COEFF), (T), (P_LO), (vlen)); \
(P_tmp) = __riscv_vmul((T), (P_HI), (vlen)); \
(P_tmp) = VSRL_I_AS_U((P_tmp), (K), (vlen)); \
(P_LO) = __riscv_vadd((P_LO), (P_tmp), (vlen)); \
} while (0)

#define ERFCINV_PQ_HILO_TINY(T, p_hi_tiny, p_lo_tiny, q_hi_tiny, q_lo_tiny, \
vlen) \
do { \
/* T is in scale of 64 */ \
VINT P_HI, P_LO, Q_HI, Q_LO, P_tmp, Q_tmp; \
\
P_HI = VMVI_VX(P_tiny_HI_9, (vlen)); \
P_LO = VMVI_VX(P_tiny_LO_9, (vlen)); \
\
UPDATE_P_LO(P_tiny_LO_8, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_8, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_7, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_7, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_6, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_6, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_5, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_5, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_4, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_4, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_3, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_3, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_2, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_2, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_1, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_1, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_0, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_0, (T), P_HI, (vlen)); \
\
Q_HI = VMVI_VX(Q_tiny_HI_9, (vlen)); \
Q_LO = VMVI_VX(Q_tiny_LO_9, (vlen)); \
\
UPDATE_P_LO(Q_tiny_LO_8, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_8, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_7, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_7, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_6, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_6, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_5, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_5, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_4, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_4, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_3, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_3, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_2, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_2, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_1, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_1, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_0, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_0, (T), Q_HI, (vlen)); \
\
VFLOAT A = __riscv_vfcvt_f(P_HI, (vlen)); \
p_lo_tiny = __riscv_vfcvt_f(P_LO, (vlen)); \
p_hi_tiny = __riscv_vfmadd(p_lo_tiny, 0x1.0p-34, A, (vlen)); \
p_lo_tiny = __riscv_vfmadd(p_lo_tiny, 0x1.0p-34, \
__riscv_vfsub(A, p_hi_tiny, (vlen)), (vlen)); \
VFLOAT B = __riscv_vfcvt_f(Q_HI, (vlen)); \
q_lo_tiny = __riscv_vfcvt_f(Q_LO, (vlen)); \
q_hi_tiny = __riscv_vfmadd(q_lo_tiny, 0x1.0p-34, B, (vlen)); \
q_lo_tiny = __riscv_vfmadd(q_lo_tiny, 0x1.0p-34, \
__riscv_vfsub(B, q_hi_tiny, (vlen)), (vlen)); \
} while (0)
15 changes: 5 additions & 10 deletions test/src/test_erfcinv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,22 @@ TEST(erfcinv, tiny_args) {
COMMENT("erfcinv: current chosen algorithm; reduced argument in FP64 only")

x_start = 0x1.0p-1074;
;
x_end = 0x1.0p-1000;
nb_tests = 40000;
double thres = 3.5;
report_err_byinv_fp64(rvvlm_erfcinv, erfcl, erfcl_prime, x_start, x_end,
nb_tests, thres);
nb_tests);

x_start = 0x1.0p-1000;
;
x_end = 0x1.0p-500;
nb_tests = 40000;
thres = 2.5;
report_err_byinv_fp64(rvvlm_erfcinv, erfcl, erfcl_prime, x_start, x_end,
nb_tests, thres);
nb_tests);

x_start = 0x1.0p-500;
;
x_end = 0x1.0p-52;
x_start = 0x1.0p-55;
x_end = 0x1.0p-53;
nb_tests = 40000;
report_err_byinv_fp64(rvvlm_erfcinv, erfcl, erfcl_prime, x_start, x_end,
nb_tests, thres);
nb_tests);
}

TEST(erfcinv, small_args) {
Expand Down

0 comments on commit 7afc81e

Please sign in to comment.