Skip to content

Commit

Permalink
Merge pull request #30 from rivosinc/dev/PingTakPeterTang/loggamma
Browse files Browse the repository at this point in the history
add FP64 loggamma function
  • Loading branch information
PingTakPeterTang authored Apr 10, 2024
2 parents bddd67f + e71cbc7 commit 1f8d20f
Show file tree
Hide file tree
Showing 10 changed files with 906 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ set(PROJECT_SOURCES
src/rvvlm_sinhDI.c
src/rvvlm_tanhD.c
src/rvvlm_tanhDI.c
src/rvvlm_lgammaD.c
src/rvvlm_lgammaDI.c
src/rvvlm_tgammaD.c
src/rvvlm_tgammaDI.c
)
Expand Down
36 changes: 36 additions & 0 deletions include/rvvlm.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,21 @@ union sui64_fp64 {
(prod_lo) = __riscv_vfmsub((x), (y), (prod_hi), (vlen)); \
} while (0)

#define PROD_X1Y2(x, y_hi, y_lo, prod_hi, prod_lo, vlen) \
do { \
(prod_hi) = __riscv_vfmul((x), (y_hi), (vlen)); \
(prod_lo) = __riscv_vfmsub((x), (y_hi), (prod_hi), (vlen)); \
(prod_lo) = __riscv_vfmacc((prod_lo), (x), (y_lo), (vlen)); \
} while (0)

#define PROD_X2Y2(x_hi, x_lo, y_hi, y_lo, prod_hi, prod_lo, vlen) \
do { \
(prod_hi) = __riscv_vfmul((x_hi), (y_hi), (vlen)); \
(prod_lo) = __riscv_vfmsub((x_hi), (y_hi), (prod_hi), (vlen)); \
(prod_lo) = __riscv_vfmacc((prod_lo), (x_hi), (y_lo), (vlen)); \
(prod_lo) = __riscv_vfmacc((prod_lo), (x_lo), (y_hi), (vlen)); \
} while (0)

#define DIV_N1D2(numer, denom, delta_d, Q, q, vlen) \
do { \
Q = __riscv_vfdiv((numer), (denom), (vlen)); \
Expand Down Expand Up @@ -172,6 +187,16 @@ union sui64_fp64 {
(delta_Q) = __riscv_vfmul(_q, __riscv_vfrec7((denom), (vlen)), (vlen)); \
} while (0)

#define ACC_DIV2_N1D2(numer, denom, delta_d, Q, delta_Q, vlen) \
do { \
VFLOAT _recip, _q; \
_recip = __riscv_vfrdiv((denom), 0x1.0p0, (vlen)); \
(Q) = __riscv_vfmul((numer), _recip, (vlen)); \
_q = __riscv_vfnmsub((Q), (denom), (numer), (vlen)); \
_q = __riscv_vfnmsac(_q, (Q), (delta_d), (vlen)); \
(delta_Q) = __riscv_vfmul(_q, _recip, (vlen)); \
} while (0)

#define ACC_DIV2_N2D2(numer, delta_n, denom, delta_d, Q, delta_Q, vlen) \
do { \
VFLOAT _recip, _q; \
Expand Down Expand Up @@ -480,6 +505,13 @@ union sui64_fp64 {
#define RVVLM_TANPIDI_VSET_CONFIG "rvvlm_fp64m2.h"
#define RVVLM_TANPIDI_MERGED rvvlm_tanpiI

// FP64 lgamma function configuration
#define RVVLM_LGAMMAD_VSET_CONFIG "rvvlm_fp64m1.h"
#define RVVLM_LGAMMAD_STD rvvlm_lgamma

#define RVVLM_LGAMMADI_VSET_CONFIG "rvvlm_fp64m1.h"
#define RVVLM_LGAMMADI_STD rvvlm_lgammaI

// FP64 tgamma function configuration
#define RVVLM_TGAMMAD_VSET_CONFIG "rvvlm_fp64m1.h"
#define RVVLM_TGAMMAD_STD rvvlm_tgamma
Expand Down Expand Up @@ -722,6 +754,10 @@ void RVVLM_TANHD_STD(size_t x_len, const double *x, double *y);
void RVVLM_TANHDI_STD(size_t x_len, const double *x, size_t stride_x, double *y,
size_t stride_y);

void RVVLM_LGAMMAD_STD(size_t x_len, const double *x, double *y);
void RVVLM_LGAMMADI_STD(size_t x_len, const double *x, size_t stride_x,
double *y, size_t stride_y);

void RVVLM_TGAMMAD_STD(size_t x_len, const double *x, double *y);
void RVVLM_TGAMMADI_STD(size_t x_len, const double *x, size_t stride_x,
double *y, size_t stride_y);
Expand Down
570 changes: 570 additions & 0 deletions include/rvvlm_lgammaD.inc.h

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions src/rvvlm_lgammaD.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// SPDX-FileCopyrightText: 2023 Rivos Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include <riscv_vector.h>
#include <stdio.h>

#include "rvvlm.h"
#define API_SIGNATURE API_SIGNATURE_11
#define STRIDE UNIT_STRIDE

#include RVVLM_LGAMMAD_VSET_CONFIG

#include "rvvlm_gammafuncsD.h"

#include "rvvlm_lgammaD.inc.h"
16 changes: 16 additions & 0 deletions src/rvvlm_lgammaDI.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// SPDX-FileCopyrightText: 2023 Rivos Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include <riscv_vector.h>
#include <stdio.h>

#include "rvvlm.h"
#define API_SIGNATURE API_SIGNATURE_11
#define STRIDE GENERAL_STRIDE

#include RVVLM_LGAMMADI_VSET_CONFIG

#include "rvvlm_gammafuncsD.h"

#include "rvvlm_lgammaD.inc.h"
2 changes: 2 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ set(TEST_SOURCES
src/test_sinhI.cpp
src/test_tanh.cpp
src/test_tanhI.cpp
src/test_lgamma.cpp
src/test_lgammaI.cpp
src/test_tgamma.cpp
src/test_tgammaI.cpp
)
Expand Down
7 changes: 7 additions & 0 deletions test/include/test_infra.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ void report_err_fp64(void (*test_func)(size_t, const double *, double *,
void report_err_fp64(void (*test_func)(size_t, const double *, double *),
long double (*ref_func)(long double), const double *, int);

void report_mixederr_fp64(void (*test_func)(size_t, const double *, double *),
long double (*ref_func)(long double), double, double,
int, double = 1.0);

// Second most common interface: testing on 1 interval
// for 1-in-1-out general-stride function
void report_err_fp64(void (*test_func)(size_t, const double *, size_t, double *,
Expand Down Expand Up @@ -106,3 +110,6 @@ long double stirling_power(long double);
long double stirling_correction(long double);
long double tgammal_mod(long double);
long double sinpix_by_pi(long double);
long double lgammap1l(long double);
long double log_stirling(long double);
long double log_stirling_correction(long double);
96 changes: 96 additions & 0 deletions test/src/test_infra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,80 @@ void report_err_fp64(double (*test_func)(double), double (*ref_func)(double),
printf("Maximum observed ULP error is %3.3lf\n", max_ulp_err);
}

void report_mixederr_fp64(void (*test_func)(size_t, const double *, double *),
long double (*ref_func)(long double), double start,
double end, int nb_pts, double threshold) {

long double y_ref;
double *x, *y, delta;
long double abs_err, rel_err, ulp_err, mixed_err;
long double max_abs_err, max_rel_err, max_ulp_err, max_mixed_err;

x = (double *)malloc(nb_pts * sizeof(double));
y = (double *)malloc(nb_pts * sizeof(double));

if (nb_pts <= 1) {
delta = 0.0;
nb_pts = 1;
} else {
delta = (end - start) / (double)(nb_pts - 1);
}

max_abs_err = 0.0;
max_rel_err = 0.0;
max_ulp_err = 0.0;
max_mixed_err = 0.0;

// fill up the set of test points
for (int i = 0; i < nb_pts; ++i) {
x[i] = start + (double)i * delta;
}

// call function under test
test_func((size_t)nb_pts, x, y);

// now for each point we compute error and log the max
for (int j = 0; j < nb_pts; j++) {
y_ref = ref_func((long double)x[j]);
abs_err = (long double)y[j] - y_ref;
abs_err = ABS(abs_err);
if (ABS((double)y_ref) > 0.0) {
rel_err = abs_err / ABS((double)y_ref);
} else {
rel_err = abs_err / 0x1.0p-1074;
}
ulp_err = abs_err / ulp_64((double)y_ref);

mixed_err = MIN(ulp_err, abs_err * 0x1.0p+52);

max_abs_err = MAX(max_abs_err, abs_err);
max_rel_err = MAX(max_rel_err, rel_err);
max_ulp_err = MAX(max_ulp_err, ulp_err);
max_mixed_err = MAX(max_mixed_err, mixed_err);

if (VERBOSE) {
union sui64_fp64 xxx, yyy;
xxx.f = x[j];
yyy.f = y[j];
printf("--input %24.17le, 0x%016lx, output %24.17le, 0x%016lx \n", xxx.f,
xxx.ui, yyy.f, yyy.ui);
printf(" reference %24.17le\n\n", (double)y_ref);
}
}
printf("----------------------------\n");
if ((ABS(start) > 100.) || (ABS(end) < 1.e-2)) {
printf("Tested %d points in [%8.3le, %8.3le]\n", nb_pts, start, end);
} else {
printf("Tested %d points in [%3.3lf, %3.3lf]\n", nb_pts, start, end);
}
printf("Maximum observed mixed error is %8.3Le\n", max_mixed_err);

EXPECT_LT((double)max_mixed_err, threshold);

free(x);
free(y);
}

void report_err2_fp64(void (*test_func)(size_t, const double *, const double *,
double *),
long double (*ref_func)(long double, long double),
Expand Down Expand Up @@ -1350,3 +1424,25 @@ long double sinpix_by_pi(long double x) {
y = -sinl(y) / pi;
return y;
}
long double lgammap1l(long double x) {
long double one = 1.0L;
long double y = x + one;
y = lgammal(y);
return y;
}
long double log_stirling(long double x) {
long double e = 2.7182818284590452353602874713526615L;
long double half = 0x1.0p-1L;
long double x_minus_half = x - half;
long double y = x / e;
y = (x - half) * logl(y);
return y;
}
long double log_stirling_correction(long double x) {
long double log_stir = log_stirling(x);
long double y = lgammal(x) - log_stir;
return y;
}
137 changes: 137 additions & 0 deletions test/src/test_lgamma.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
// SPDX-FileCopyrightText: 2023 Rivos Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include <gtest/gtest.h>
#include <math.h>

#include "rvvlm.h"
#include "test_infra.h"

TEST(lgamma, special) {
unsigned long nb_tests;
double x_start, x_end;

COMMENT("lgamma: current chosen algorithm; reduced argument in FP64 only")

show_special_fp64(rvvlm_lgamma, "Special Value handling of this function");
}

TEST(lgamma, lgamma_neg) {
unsigned long nb_tests;
double x_start, x_end;

x_start = -0.999;
x_end = -0.0001;
nb_tests = 5000;
report_mixederr_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = -1.99;
x_end = -1.01;
nb_tests = 5000;
report_mixederr_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = -2.99;
x_end = -2.01;
nb_tests = 5000;
report_mixederr_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = -3.99;
x_end = -3.01;
nb_tests = 5000;
report_mixederr_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = -4.99;
x_end = -4.01;
nb_tests = 5000;
report_mixederr_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = -5.99;
x_end = -5.01;
nb_tests = 5000;
report_mixederr_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = -6.9999;
x_end = -6.00001;
nb_tests = 10000;
report_mixederr_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = -1000.9999;
x_end = -1000.0001;
nb_tests = 10000;
report_mixederr_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);
}
TEST(lgamma, lgamma_left) {
unsigned long nb_tests;
double x_start, x_end;

x_start = 0x1.0p-1070;
x_end = 0x1.0p-1065;
nb_tests = 5000;
report_err_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = 0x1.0p-62;
x_end = 0x1.0p-58;
nb_tests = 5000;
report_err_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = 0x1.0p-58;
x_end = 0x1.0p-40;
nb_tests = 5000;
report_err_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = 0x1.0p-4;
x_end = 0x1.0p-1;
nb_tests = 5000;
report_err_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = 0x1.0p-1;
x_end = 0x1.ffffffffp-1;
nb_tests = 5000;
report_err_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = 1.0;
x_end = 0x1.ffffffffp0;
nb_tests = 5000;
report_err_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = 0x1.0p1;
x_end = 0x1.ffffffffp1;
nb_tests = 5000;
report_err_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = 0x1.0p2;
x_end = 0x1.dfffffffp-1;
nb_tests = 5000;
report_err_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);
}

TEST(lgamma, lgamma_right) {
unsigned long nb_tests;
double x_start, x_end;

x_start = 2.25;
x_end = 10.0;
nb_tests = 5000;
report_err_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = 10.0;
x_end = 100.0;
nb_tests = 5000;
report_err_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);
}

TEST(lgamma, lgamma_large) {
unsigned long nb_tests;
double x_start, x_end;

x_start = 0x1.0p800;
x_end = 0x1.0p802;
nb_tests = 5000;
report_err_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);

x_start = 0x1.1p1012;
x_end = 0x1.fffffp1012;
nb_tests = 5000;
report_err_fp64(rvvlm_lgamma, lgammal, x_start, x_end, nb_tests);
}
Loading

0 comments on commit 1f8d20f

Please sign in to comment.