Skip to content

Commit

Permalink
Merge branch 'main' into dev/PingTakPeterTang/fix_formats
Browse files Browse the repository at this point in the history
  • Loading branch information
PingTakPeterTang authored Mar 12, 2024
2 parents ab3c498 + c94ec9d commit 31f5873
Show file tree
Hide file tree
Showing 40 changed files with 774 additions and 375 deletions.
10 changes: 10 additions & 0 deletions include/rvvlm.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ union sui64_fp64 {
} \
} while (0)

#define VSRL_I_AS_U(x, nbits, vlen) \
U_AS_I(__riscv_vsrl(I_AS_U((x)), (nbits), (vlen)))

#define PSTEP(coeff_j, x, poly, vlen) \
__riscv_vfmadd((poly), (x), VFMV_VF((coeff_j), (vlen)), (vlen))

Expand All @@ -93,6 +96,13 @@ union sui64_fp64 {
__riscv_vsra(__riscv_vsmul((POLY), (X), 1, (vlen)), (K), (vlen)), \
(COEFF_j), (vlen))

#define PSTEP_I_HI_SRA(COEFF_j, X, K, POLY, vlen) \
__riscv_vadd(__riscv_vsra(__riscv_vmulh((POLY), (X), (vlen)), (K), (vlen)), \
(COEFF_j), (vlen))

#define PSTEP_I_HI(COEFF_j, X, POLY, vlen) \
__riscv_vadd(__riscv_vmulh((POLY), (X), (vlen)), (COEFF_j), (vlen))

#define PSTEPN_I(COEFF_j, X, POLY, vlen) \
__riscv_vrsub(__riscv_vsmul((POLY), (X), 1, (vlen)), (COEFF_j), (vlen))

Expand Down
16 changes: 5 additions & 11 deletions include/rvvlm_erfcinvD.inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@ void F_VER1(API) {
VFLOAT vx, vx_sign, vy, vy_special;
VBOOL special_args;

VFLOAT dummy;

SET_ROUNDTONEAREST;
// stripmining over input arguments
for (; _inarg_n > 0; _inarg_n -= vlen) {
Expand All @@ -85,8 +83,6 @@ void F_VER1(API) {
// Handle Inf and NaN
EXCEPTION_HANDLING_ERFCINV(vx, special_args, vy_special, vlen);

dummy = VFMV_VF(3.1415926535, vlen);

vx_sign = __riscv_vfrsub(vx, fp_posOne, vlen);
VFLOAT two_minus_x = __riscv_vfadd(vx_sign, fp_posOne, vlen);
VBOOL x_gt_1 = __riscv_vmflt(vx_sign, fp_posZero, vlen);
Expand All @@ -97,6 +93,7 @@ void F_VER1(API) {
VFLOAT w_hi, w_lo, w_hi_left, w_lo_left, y_hi, y_lo;
VINT T, T_left, T_tiny;
VBOOL x_is_tiny;
x_is_tiny = __riscv_vmxor(x_is_tiny, x_is_tiny, vlen);

if (__riscv_vcpop(x_in_left, vlen) > 0) {
VFLOAT x_left = VFMV_VF(0x1.0p-3, vlen);
Expand All @@ -110,10 +107,8 @@ void F_VER1(API) {
0x1.0p-63, vlen);
if (__riscv_vcpop(x_is_tiny, vlen) > 0) {
VFLOAT w_hi_dummy, w_lo_dummy;
SQRTX_4_TRANSFORM(y_hi, y_lo, w_hi_dummy, w_lo_dummy, T_tiny, 0x1.0p65,
0x1.0p-65, vlen);
dummy = __riscv_vfmul(__riscv_vfcvt_f(T_tiny, vlen), 0x1.0p-2, vlen);
dummy = __riscv_vfsub(dummy, __riscv_vfcvt_f(T_left, vlen), vlen);
SQRTX_4_TRANSFORM(y_hi, y_lo, w_hi_dummy, w_lo_dummy, T_tiny, 0x1.0p64,
0x1.0p-64, vlen);
}
}
w_hi = VFMV_VF(fp_posOne, vlen);
Expand Down Expand Up @@ -200,7 +195,8 @@ void F_VER1(API) {

if (__riscv_vcpop(x_is_tiny, vlen) > 0) {
VFLOAT p_hi_tiny, p_lo_tiny, q_hi_tiny, q_lo_tiny;
ERFCINV_PQ_TINY(T_tiny, p_hi_tiny, p_lo_tiny, q_hi_tiny, q_lo_tiny, vlen);
ERFCINV_PQ_HILO_TINY(T_tiny, p_hi_tiny, p_lo_tiny, q_hi_tiny, q_lo_tiny,
vlen);
p_hi = __riscv_vmerge(p_hi, p_hi_tiny, x_is_tiny, vlen);
p_lo = __riscv_vmerge(p_lo, p_lo_tiny, x_is_tiny, vlen);
q_hi = __riscv_vmerge(q_hi, q_hi_tiny, x_is_tiny, vlen);
Expand All @@ -220,8 +216,6 @@ void F_VER1(API) {
vy = __riscv_vfsgnj(vy, vx_sign, vlen);
vy = __riscv_vmerge(vy, vy_special, special_args, vlen);

// vy = dummy;

// copy vy into y and increment addr pointers
VFSTORE_OUTARG1(vy, vlen);

Expand Down
116 changes: 116 additions & 0 deletions include/rvvlm_inverrorfuncsD.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,52 @@
#define Q_tiny_9 0x5e4a26a7c1415755 // sacle 57
#define DELTA_Q0_tiny 0x1.8a7adad44d65ap-4 // scale 66

#define Q50_84
// Using [P,Q]_tiny_[HI,LO]_k, HI in Q50, LO in Q84
#if defined(Q50_84)
#define P_tiny_HI_0 -0x8593442eL
#define P_tiny_LO_0 -0x4e7245b3L
#define P_tiny_HI_1 -0x7f3dc156b1L
#define P_tiny_LO_1 -0x1f0300096L
#define P_tiny_HI_2 -0x20dbbc67b1b8L
#define P_tiny_LO_2 -0xbc59b742L
#define P_tiny_HI_3 -0x30c75b8264d44L
#define P_tiny_LO_3 -0x18a421ab9L
#define P_tiny_HI_4 -0x1ab06f39e5addeL
#define P_tiny_LO_4 -0x180f2a477L
#define P_tiny_HI_5 -0x3d698fc2964a9cL
#define P_tiny_LO_5 0xc3d4ab0bL
#define P_tiny_HI_6 0x7f7d6a748d0c2fL
#define P_tiny_LO_6 0x1729754e9L
#define P_tiny_HI_7 0x18b100818c77848L
#define P_tiny_LO_7 0x1aca73439L
#define P_tiny_HI_8 0x4be1ed5d031774L
#define P_tiny_LO_8 -0x3b6c5afbL
#define P_tiny_HI_9 0x156a7e2e54e260L
#define P_tiny_LO_9 0x1a0c336beL

#define Q_tiny_HI_0 -0x85933cdaL
#define Q_tiny_LO_0 -0xb5b39d61L
#define Q_tiny_HI_1 -0x7f3de4b69fL
#define Q_tiny_LO_1 -0x151d1cd35L
#define Q_tiny_HI_2 -0x20dd8dc1da27L
#define Q_tiny_LO_2 -0x1706945d7L
#define Q_tiny_HI_3 -0x30dc92d1cd231L
#define Q_tiny_LO_3 0xabde03f9L
#define Q_tiny_HI_4 -0x1af5fcee397d58L
#define Q_tiny_LO_4 -0xc3530d28L
#define Q_tiny_HI_5 -0x42639eeec1d051L
#define Q_tiny_LO_5 0x662b41ecL
#define Q_tiny_HI_6 0x6182b99f6ca998L
#define Q_tiny_LO_6 0x938a5e35L
#define Q_tiny_HI_7 0x17a6848dc07624aL
#define Q_tiny_LO_7 0x8a0484b7L
#define Q_tiny_HI_8 0x105ecd6aac52b12L
#define Q_tiny_LO_8 0x1d1e38258L
#define Q_tiny_HI_9 0xbc944d4f8282afL
#define Q_tiny_LO_9 -0x155b50b48L
#endif

// erfinv(+-1) = +-Inf with divide by zero
// erfinv(x) |x| > 1, real is NaN with invalid
// erfinv(NaN) is NaN, invalid if input is signalling NaN
Expand Down Expand Up @@ -211,3 +257,73 @@
__riscv_vsub(Q, __riscv_vfcvt_x(q_hi_tiny, (vlen)), (vlen)), (vlen)); \
q_lo_tiny = __riscv_vfadd(q_lo, DELTA_Q0_tiny, (vlen)); \
} while (0)

#define UPDATE_P_LO(COEFF, T, P_HI, P_LO, P_tmp, K, vlen) \
do { \
(P_LO) = PSTEP_I_HI((COEFF), (T), (P_LO), (vlen)); \
(P_tmp) = __riscv_vmul((T), (P_HI), (vlen)); \
(P_tmp) = VSRL_I_AS_U((P_tmp), (K), (vlen)); \
(P_LO) = __riscv_vadd((P_LO), (P_tmp), (vlen)); \
} while (0)

#define ERFCINV_PQ_HILO_TINY(T, p_hi_tiny, p_lo_tiny, q_hi_tiny, q_lo_tiny, \
vlen) \
do { \
/* T is in scale of 64 */ \
VINT P_HI, P_LO, Q_HI, Q_LO, P_tmp, Q_tmp; \
\
P_HI = VMVI_VX(P_tiny_HI_9, (vlen)); \
P_LO = VMVI_VX(P_tiny_LO_9, (vlen)); \
\
UPDATE_P_LO(P_tiny_LO_8, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_8, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_7, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_7, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_6, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_6, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_5, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_5, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_4, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_4, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_3, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_3, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_2, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_2, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_1, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_1, (T), P_HI, (vlen)); \
UPDATE_P_LO(P_tiny_LO_0, (T), P_HI, P_LO, P_tmp, 30, (vlen)); \
P_HI = PSTEP_I_HI(P_tiny_HI_0, (T), P_HI, (vlen)); \
\
Q_HI = VMVI_VX(Q_tiny_HI_9, (vlen)); \
Q_LO = VMVI_VX(Q_tiny_LO_9, (vlen)); \
\
UPDATE_P_LO(Q_tiny_LO_8, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_8, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_7, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_7, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_6, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_6, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_5, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_5, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_4, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_4, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_3, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_3, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_2, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_2, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_1, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_1, (T), Q_HI, (vlen)); \
UPDATE_P_LO(Q_tiny_LO_0, (T), Q_HI, Q_LO, Q_tmp, 30, (vlen)); \
Q_HI = PSTEP_I_HI(Q_tiny_HI_0, (T), Q_HI, (vlen)); \
\
VFLOAT A = __riscv_vfcvt_f(P_HI, (vlen)); \
p_lo_tiny = __riscv_vfcvt_f(P_LO, (vlen)); \
p_hi_tiny = __riscv_vfmadd(p_lo_tiny, 0x1.0p-34, A, (vlen)); \
p_lo_tiny = __riscv_vfmadd(p_lo_tiny, 0x1.0p-34, \
__riscv_vfsub(A, p_hi_tiny, (vlen)), (vlen)); \
VFLOAT B = __riscv_vfcvt_f(Q_HI, (vlen)); \
q_lo_tiny = __riscv_vfcvt_f(Q_LO, (vlen)); \
q_hi_tiny = __riscv_vfmadd(q_lo_tiny, 0x1.0p-34, B, (vlen)); \
q_lo_tiny = __riscv_vfmadd(q_lo_tiny, 0x1.0p-34, \
__riscv_vfsub(B, q_hi_tiny, (vlen)), (vlen)); \
} while (0)
116 changes: 59 additions & 57 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,40 @@

set(TEST_SOURCES
src/test_infra.cpp
# src/test_acos.cpp
# src/test_acosI.cpp
# src/test_acospi.cpp
# src/test_acospiI.cpp
# src/test_asin.cpp
# src/test_asinI.cpp
# src/test_asinpi.cpp
# src/test_asinpiI.cpp
# src/test_atan.cpp
# src/test_atanI.cpp
# src/test_atanpi.cpp
# src/test_atanpiI.cpp
# src/test_atan2.cpp
# src/test_atan2I.cpp
# src/test_atan2pi.cpp
# src/test_atan2piI.cpp
# src/test_acosh.cpp
# src/test_acoshI.cpp
# src/test_asinh.cpp
# src/test_asinhI.cpp
# src/test_atanh.cpp
# src/test_atanhI.cpp
# src/test_cbrt.cpp
# src/test_cbrtI.cpp
# src/test_cdfnorm.cpp
# src/test_cdfnormI.cpp
src/test_acos.cpp
src/test_acosI.cpp
src/test_acospi.cpp
src/test_acospiI.cpp
src/test_asin.cpp
src/test_asinI.cpp
src/test_asinpi.cpp
src/test_asinpiI.cpp
src/test_atan.cpp
src/test_atanI.cpp
src/test_atanpi.cpp
src/test_atanpiI.cpp
src/test_atan2.cpp
src/test_atan2I.cpp
src/test_atan2pi.cpp
src/test_atan2piI.cpp
src/test_acosh.cpp
src/test_acoshI.cpp
src/test_asinh.cpp
src/test_asinhI.cpp
src/test_atanh.cpp
src/test_atanhI.cpp
src/test_cbrt.cpp
src/test_cbrtI.cpp
src/test_cdfnorm.cpp
src/test_cdfnormI.cpp
src/test_erf.cpp
src/test_erfI.cpp
src/test_erfc.cpp
src/test_erfcI.cpp
src/test_erfinv.cpp
src/test_erfinvI.cpp
src/test_erfcinv.cpp
src/test_erfcinvI.cpp
src/test_exp.cpp
src/test_expI.cpp
src/test_exp2.cpp
Expand All @@ -44,37 +46,37 @@ set(TEST_SOURCES
src/test_exp10I.cpp
src/test_expm1.cpp
src/test_expm1I.cpp
# src/test_log.cpp
# src/test_logI.cpp
# src/test_log2.cpp
# src/test_log2I.cpp
# src/test_log10.cpp
# src/test_log10I.cpp
# src/test_log1p.cpp
# src/test_log1pI.cpp
# src/test_pow.cpp
# src/test_powI.cpp
# src/test_cos.cpp
# src/test_cosI.cpp
# src/test_sin.cpp
# src/test_sinI.cpp
# src/test_cospi.cpp
# src/test_cospiI.cpp
# src/test_sinpi.cpp
# src/test_sinpiI.cpp
# src/test_sincos.cpp
# src/test_sincosI.cpp
# src/test_sincospi.cpp
# src/test_sincospiI.cpp
# src/test_tan.cpp
# src/test_tanI.cpp
# src/test_tanpi.cpp
# src/test_cosh.cpp
# src/test_coshI.cpp
# src/test_sinh.cpp
# src/test_sinhI.cpp
# src/test_tanh.cpp
# src/test_tanhI.cpp
src/test_log.cpp
src/test_logI.cpp
src/test_log2.cpp
src/test_log2I.cpp
src/test_log10.cpp
src/test_log10I.cpp
src/test_log1p.cpp
src/test_log1pI.cpp
src/test_pow.cpp
src/test_powI.cpp
src/test_cos.cpp
src/test_cosI.cpp
src/test_sin.cpp
src/test_sinI.cpp
src/test_cospi.cpp
src/test_cospiI.cpp
src/test_sinpi.cpp
src/test_sinpiI.cpp
src/test_sincos.cpp
src/test_sincosI.cpp
src/test_sincospi.cpp
src/test_sincospiI.cpp
src/test_tan.cpp
src/test_tanI.cpp
src/test_tanpi.cpp
src/test_cosh.cpp
src/test_coshI.cpp
src/test_sinh.cpp
src/test_sinhI.cpp
src/test_tanh.cpp
src/test_tanhI.cpp
)

add_executable(test_veclibm ${TEST_SOURCES})
Expand Down
Loading

0 comments on commit 31f5873

Please sign in to comment.