From fa8efdc133054a0416e8ea8666ccf0090b635073 Mon Sep 17 00:00:00 2001 From: Andriy Plokhotnyuk Date: Fri, 16 Jun 2023 09:53:15 +0200 Subject: [PATCH 01/29] Speed up `JavaImpl.canonizeFinite` It speeds up `Decimal64Utils.canonize` in ~1.5x times on Intel CPUs --- .../java/com/epam/deltix/dfp/JavaImpl.java | 38 ++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java index c6085f16..029e395d 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java @@ -161,15 +161,35 @@ public static long canonizeFinite(final long value) { if (coefficient == 0) return ZERO; - long div10 = coefficient / 10; - if (div10 * 10 != coefficient) - return value; - - do { - coefficient = div10; - div10 /= 10; - ++exponent; - } while (div10 * 10 == coefficient); + if ((int) coefficient == coefficient) { + long p = coefficient * 3435973837L; + if ((p & 0x780000000L) != 0) + return value; + do { + coefficient = p >> 35; + p = coefficient * 3435973837L; + ++exponent; + } while ((p & 0x780000000L) == 0); + } else { + long div10 = coefficient / 10; + if (div10 * 10 != coefficient) + return value; + do { + if ((int) div10 == div10) { + long p; + do { + coefficient = div10; + p = coefficient * 3435973837L; + div10 = p >> 35; + ++exponent; + } while ((p & 0x780000000L) == 0); + break; + } + coefficient = div10; + div10 /= 10; + ++exponent; + } while (div10 * 10 == coefficient); + } return pack(signMask, exponent, coefficient, BID_ROUNDING_TO_NEAREST); } From d65e285bed059e0ce4d4fcae8c975383116582f5 Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Thu, 22 Jun 2023 23:56:09 +0300 Subject: [PATCH 02/29] Enhance fast div10 code. --- .../java/com/epam/deltix/dfp/JavaImpl.java | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java index 029e395d..4ae4cb3e 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java @@ -29,6 +29,12 @@ private JavaImpl() { public static final int MIN_EXPONENT = -383; public static final int MAX_EXPONENT = 384; + // See the https://www.agner.org/optimize/optimizing_assembly.pdf part 16.8 Division (all processors) + public static final long FAST_DIV10_RECIPROCAL = 0xCCCCCCCDL; // (2^FAST_DIV10_SHIFT) / 10 + public static final int FAST_DIV10_SHIFT = 35; + public static final long FAST_DIV10_MUL10_MASK = 0x780000000L; // Highest nibble shifted out by FAST_DIV10_SHIFT + // (((1L << FAST_DIV10_SHIFT) - 1) >> (FAST_DIV10_SHIFT - 4)) << (FAST_DIV10_SHIFT - 4) + public static long fromInt32(final int value) { final long longValue = value; // Fixes -Integer.MIN_VALUE return value >= 0 ? (0x31C00000L << 32) | longValue : (0xB1C00000L << 32) | -longValue; @@ -162,14 +168,14 @@ public static long canonizeFinite(final long value) { return ZERO; if ((int) coefficient == coefficient) { - long p = coefficient * 3435973837L; - if ((p & 0x780000000L) != 0) + long p = coefficient * FAST_DIV10_RECIPROCAL; + if ((p & FAST_DIV10_MUL10_MASK) != 0) return value; do { - coefficient = p >> 35; - p = coefficient * 3435973837L; + coefficient = p >> FAST_DIV10_SHIFT; + p = coefficient * FAST_DIV10_RECIPROCAL; ++exponent; - } while ((p & 0x780000000L) == 0); + } while ((p & FAST_DIV10_MUL10_MASK) == 0); } else { long div10 = coefficient / 10; if (div10 * 10 != coefficient) @@ -179,10 +185,10 @@ public static long canonizeFinite(final long value) { long p; do { coefficient = div10; - p = coefficient * 3435973837L; - div10 = p >> 35; + p = coefficient * FAST_DIV10_RECIPROCAL; + div10 = p >> FAST_DIV10_SHIFT; ++exponent; - } while ((p & 0x780000000L) == 0); + } while ((p & FAST_DIV10_MUL10_MASK) == 0); break; } coefficient = div10; From b8654f300897d9e869b4b906759d67f5421aff21 Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Thu, 22 Jun 2023 23:59:32 +0300 Subject: [PATCH 03/29] Enhance CanonizeBenchmark with fast div10 operation. --- .../epam/deltix/dfp/CanonizeBenchmark.java | 59 +++++++++++++++++-- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/CanonizeBenchmark.java b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/CanonizeBenchmark.java index 8652a438..cea1d978 100644 --- a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/CanonizeBenchmark.java +++ b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/CanonizeBenchmark.java @@ -1,6 +1,7 @@ package com.epam.deltix.dfp; import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.Options; @@ -8,6 +9,8 @@ import java.util.concurrent.TimeUnit; +import static com.epam.deltix.dfp.JavaImpl.*; + @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @Warmup(time = 2, iterations = 4) @@ -15,18 +18,64 @@ @State(Scope.Thread) @Fork(3) public class CanonizeBenchmark { - /* 10., 1000_000., 123.456789123, 1.23, null(=NaN) */ - @Param({"3584865303386914826", "3584865303387914816", "3503800633551035011", "3566850904877432955", "-128"}) + /* 10., 1000_000., 123.456789123, 1.23, null(=NaN), 0., 1., 1000., 1000000., 1000000000. */ + @Param({"3584865303386914826", "3584865303387914816", "3503800633551035011", "3566850904877432955", "-128", + "3584865303386914816", "3584865303386914817", "3584865303386915816", "3584865303387914816", "3584865304386914816"}) private long decimalValue; @Benchmark - public long canonize() { - return Decimal64Utils.canonize(decimalValue); + public void canonize(Blackhole bh) { + bh.consume(Decimal64Utils.canonize(decimalValue)); + } + + @Benchmark + public void canonizeOrig(Blackhole bh) { + bh.consume(canonizeFiniteOrig(decimalValue)); + } + + public static long canonizeFiniteOrig(final long value) { + final long signMask = value & MASK_SIGN; + long coefficient; + int exponent; + + if (isSpecial(value)) { + assert (isFinite(value)); + + // Check for non-canonical values. + final long x = (value & LARGE_COEFFICIENT_MASK) | LARGE_COEFFICIENT_HIGH_BIT; + coefficient = x > MAX_COEFFICIENT ? 0 : x; + + // Extract exponent. + final long tmp = value >> EXPONENT_SHIFT_LARGE; + exponent = (int) (tmp & EXPONENT_MASK); + } else { + // Extract coefficient. + coefficient = (value & SMALL_COEFFICIENT_MASK); + + // Extract exponent. Maximum biased value for "small exponent" is 0x2FF(*2=0x5FE), signed: [] + // upper 1/4 of the mask range is "special", as checked in the code above + final long tmp = value >> EXPONENT_SHIFT_SMALL; + exponent = (int) (tmp & EXPONENT_MASK); + } + + if (coefficient == 0) + return ZERO; + + long div10 = coefficient / 10; + if (div10 * 10 != coefficient) + return value; + + do { + coefficient = div10; + div10 /= 10; + ++exponent; + } while (div10 * 10 == coefficient); + return pack(signMask, exponent, coefficient, BID_ROUNDING_TO_NEAREST); } public static void main(final String[] args) throws RunnerException { final Options opt = new OptionsBuilder() - .include(".*" + UnaryOperationBenchmark.class.getSimpleName() + ".*") + .include(".*" + CanonizeBenchmark.class.getSimpleName() + ".*") .forks(1) .build(); new Runner(opt).run(); From b09168b72d7cb82082c4f4e2513e016d8ddc5583 Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Fri, 23 Jun 2023 00:03:57 +0300 Subject: [PATCH 04/29] Add test for fast div10 acceptable range. --- .../test/java/com/epam/deltix/dfp/JavaImplTest.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/java/dfp/src/test/java/com/epam/deltix/dfp/JavaImplTest.java b/java/dfp/src/test/java/com/epam/deltix/dfp/JavaImplTest.java index 7fdb010b..dc24ded4 100644 --- a/java/dfp/src/test/java/com/epam/deltix/dfp/JavaImplTest.java +++ b/java/dfp/src/test/java/com/epam/deltix/dfp/JavaImplTest.java @@ -898,4 +898,17 @@ public void unsignedReplacementTest() { assertEquals(sig_x > 9999999999999999L, UnsignedLong.isGreater(sig_x, 9999999999999999L)); assertEquals(sig_x < Long.MAX_VALUE, UnsignedLong.isLess(sig_x, Long.MAX_VALUE)); } + + @Test + public void div10Test() { + final long coefficient = Long.MAX_VALUE / FAST_DIV10_RECIPROCAL; // Critical point + assertTrue(coefficient > Integer.MAX_VALUE); + + final long r = coefficient / 10; + + long p = coefficient * FAST_DIV10_RECIPROCAL; + final long coefficient10 = p >> FAST_DIV10_SHIFT; + + assertEquals(r, coefficient10); + } } From f39cd5ee4d41ddc7d9532c58c49a115de57370fc Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Fri, 23 Jun 2023 00:19:05 +0300 Subject: [PATCH 05/29] Java: Speedup fromDecimalDouble --- .../src/main/java/com/epam/deltix/dfp/JavaImpl.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java index 4ae4cb3e..175fcc20 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java @@ -341,6 +341,17 @@ public static long fromDecimalDouble(final double x) { return y; for (long n = m; ; ) { + if ((int) n == n) { + long p; + while (true) { + p = n * FAST_DIV10_RECIPROCAL; + final long m10 = n >> FAST_DIV10_SHIFT; + if ((p & FAST_DIV10_MUL10_MASK) != 0) + return signAndExp + n; + n = m10; + signAndExp += 1L << EXPONENT_SHIFT_SMALL; + } + } final long m10 = n / 10; if (m10 * 10 != n) return signAndExp + n; From 698c6ec05bf869e1f29f1af5c89c30fca240a0db Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Fri, 23 Jun 2023 00:27:16 +0300 Subject: [PATCH 06/29] Java: Add fromDecimalDouble benchmark. --- .../com/epam/deltix/dfp/MathBenchmark.java | 62 ++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java index d7923f1c..6cb857aa 100644 --- a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java +++ b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java @@ -9,6 +9,9 @@ import java.util.concurrent.TimeUnit; +import static com.epam.deltix.dfp.JavaImpl.*; +import static com.epam.deltix.dfp.JavaImpl.EXPONENT_SHIFT_SMALL; + @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @Warmup(time = 3, iterations = 1) @@ -16,14 +19,18 @@ @State(Scope.Thread) public class MathBenchmark { private long[] decimalValues; + private double[] doubleValues; public static int fixedSeed = 42 * 42 * 42 * 42 * 42; @Setup public void setUp() { TestUtils.RandomDecimalsGenerator generator = new TestUtils.RandomDecimalsGenerator(fixedSeed); decimalValues = new long[1004]; - for (int i = 0; i < decimalValues.length; ++i) + doubleValues = new double[decimalValues.length]; + for (int i = 0; i < decimalValues.length; ++i) { decimalValues[i] = generator.nextX(); + doubleValues[i] = Decimal64Utils.toDouble(decimalValues[i]); + } } @Benchmark @@ -242,6 +249,59 @@ public void fdimOld(Blackhole bh) { // bh.consume(NativeImpl.bid64Fdim(decimalValues[i], Decimal64Utils.negate(decimalValues[i + 1]))); // } + @Benchmark + public void fromDecimalDouble(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.fromDecimalDouble(doubleValues[i])); + } + + @Benchmark + public void fromDecimalDoubleOrig(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(fromDecimalDoubleOrig(doubleValues[i])); + } + + public static long fromDecimalDoubleOrig(final double x) { + final long y = Decimal64Utils.fromDouble(x); + long m, signAndExp; + + // Odd + special encoding(16 digits) + final long notY = ~y; + if ((MASK_SPECIAL & notY) == 0) { + if ((MASK_INFINITY_AND_NAN & notY) == 0) + return y; + + m = (y & LARGE_COEFFICIENT_MASK) + LARGE_COEFFICIENT_HIGH_BIT; + signAndExp = ((y << 2) & EXPONENT_MASK_SMALL) + (y & MASK_SIGN); + } else { + m = y & SMALL_COEFFICIENT_MASK; + // 16 digits + odd + signAndExp = y & (-1L << EXPONENT_SHIFT_SMALL); + if (m <= MAX_COEFFICIENT / 10 + 1) + return y; + } + + if ((y & 1) == 0) + return y; + // NeedAdjustment + // Check the last digit + final long m1 = m + 1; + m = m1 / 10; + if (m1 - m * 10 > 2) + return y; + + signAndExp += 1L << EXPONENT_SHIFT_SMALL; + if (Decimal64Utils.toDouble(signAndExp + m) != x) + return y; + + for (long n = m; ; ) { + final long m10 = n / 10; + if (m10 * 10 != n) + return signAndExp + n; + n = m10; + signAndExp += 1L << EXPONENT_SHIFT_SMALL; + } + } public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder() From 29c107032d7b8fffd5818c73b0a60f17717675f5 Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Fri, 23 Jun 2023 00:45:25 +0300 Subject: [PATCH 07/29] Fix fast division typo. --- java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java index 175fcc20..7868887c 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java @@ -345,7 +345,7 @@ public static long fromDecimalDouble(final double x) { long p; while (true) { p = n * FAST_DIV10_RECIPROCAL; - final long m10 = n >> FAST_DIV10_SHIFT; + final long m10 = p >> FAST_DIV10_SHIFT; if ((p & FAST_DIV10_MUL10_MASK) != 0) return signAndExp + n; n = m10; From 0552325f69c42ed6b1d379f69aaca0f4c1f38f7f Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Fri, 23 Jun 2023 15:20:39 +0300 Subject: [PATCH 08/29] Java: Tiny speedup: replace UnsignedLong.isGreater(x, 0) with x!=0 (Enhancement by plokhotnyuk) --- .../src/main/java/com/epam/deltix/dfp/JavaImplCmp.java | 10 +++++----- .../main/java/com/epam/deltix/dfp/JavaImplMinMax.java | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java index af75b1e4..1590af7d 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java @@ -190,7 +190,7 @@ public static int compare(final long /*BID_UINT64*/ x, final long /*BID_UINT64*/ } // if positive, return whichever significand abs is smaller // (converse if negative) - return (((UnsignedLong.isGreater(sig_n_prime_w1, 0)) + return (((/*UnsignedLong.isGreater*/(sig_n_prime_w1 != 0)) || (UnsignedLong.isLess(sig_x, sig_n_prime_w0))) ^ (!x_mask_sign)) ? 1 : -1; // @AD: TODO: Check this case carefully } @@ -418,7 +418,7 @@ public static boolean bid64_quiet_greater(final long /*BID_UINT64*/ x, final lon if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_y)) { return false; } - return (((UnsignedLong.isGreater(sig_n_prime_w1, 0)) + return (((/*UnsignedLong.isGreater*/(sig_n_prime_w1 != 0)) || UnsignedLong.isGreater(sig_n_prime_w0, sig_y)) ^ (x_mask_sign)); } // adjust the y significand upwards @@ -614,7 +614,7 @@ public static boolean bid64_quiet_greater_equal(final long /*BID_UINT64*/ x, fin } // if positive, return whichever significand abs is smaller // (converse if negative) - return (((UnsignedLong.isGreater(sig_n_prime_w1, 0)) + return (((/*UnsignedLong.isGreater*/(sig_n_prime_w1 != 0)) || (UnsignedLong.isLess(sig_x, sig_n_prime_w0))) ^ (!x_mask_sign)); } @@ -782,7 +782,7 @@ public static boolean bid64_quiet_less(final long /*BID_UINT64*/ x, final long / } // if positive, return whichever significand abs is smaller // (converse if negative) - return (((UnsignedLong.isGreater(sig_n_prime_w1, 0)) + return (((/*UnsignedLong.isGreater*/(sig_n_prime_w1 != 0)) || (UnsignedLong.isLess(sig_x, sig_n_prime_w0))) ^ (x_mask_sign)); } @@ -949,7 +949,7 @@ public static boolean bid64_quiet_less_equal(final long /*BID_UINT64*/ x, final } // if positive, return whichever significand abs is smaller // (converse if negative) - return (((UnsignedLong.isGreater(sig_n_prime_w1, 0)) + return (((/*UnsignedLong.isGreater*/(sig_n_prime_w1 != 0)) || (UnsignedLong.isLess(sig_x, sig_n_prime_w0))) ^ (x_mask_sign)); } diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java index 46fa4ea2..9154bece 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java @@ -169,7 +169,7 @@ public static long bid64_min_fix_nan(long /*BID_UINT64*/ x, long /*BID_UINT64*/ return y; } - return ((UnsignedLong.isGreater(sig_n_prime_w1, 0) + return ((/*UnsignedLong.isGreater*/(sig_n_prime_w1 != 0) || UnsignedLong.isGreater(sig_n_prime_w0, sig_y)) ^ ((x & MASK_SIGN) == MASK_SIGN)) ? y : x; } // adjust the y significand upwards @@ -361,7 +361,7 @@ public static long bid64_max_fix_nan(long /*BID_UINT64*/ x, long /*BID_UINT64*/ if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_y)) { return y; } - return ((UnsignedLong.isGreater(sig_n_prime_w1, 0) + return ((/*UnsignedLong.isGreater*/(sig_n_prime_w1 != 0) || UnsignedLong.isGreater(sig_n_prime_w0, sig_y)) ^ ((x & MASK_SIGN) == MASK_SIGN)) ? x : y; } // adjust the y significand upwards From fb66141648bcd9dc8184edfa0a19cb9a33d9cc64 Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Mon, 26 Jun 2023 22:47:28 +0300 Subject: [PATCH 09/29] Java: Initial version of the multi-release JAR Java: Update to minimal Java 1.8 --- gradle/wrapper/gradle-wrapper.properties | 2 +- java/dfp/build.gradle | 43 ++- .../java/com/epam/deltix/dfp/Mul64Impl.java | 30 ++ .../java18/com/epam/deltix/dfp/Mul64Impl.java | 11 + .../java9/com/epam/deltix/dfp/Mul64Impl.java | 15 + .../com/epam/deltix/dfp/JavaImplTest.java | 83 +++++ .../com/epam/deltix/dfp/MathBenchmark.java | 290 ++---------------- 7 files changed, 210 insertions(+), 264 deletions(-) create mode 100644 java/dfp/src/main/java/com/epam/deltix/dfp/Mul64Impl.java create mode 100644 java/dfp/src/main/java18/com/epam/deltix/dfp/Mul64Impl.java create mode 100644 java/dfp/src/main/java9/com/epam/deltix/dfp/Mul64Impl.java diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 2e6e5897..fae08049 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.1.1-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/java/dfp/build.gradle b/java/dfp/build.gradle index 33501ae1..c217825f 100644 --- a/java/dfp/build.gradle +++ b/java/dfp/build.gradle @@ -9,10 +9,33 @@ if (isReleaseVersion) { group = 'com.epam.deltix' -sourceCompatibility = 1.7 -compileTestJava { - sourceCompatibility = "1.8" - targetCompatibility = "1.8" +sourceSets { + java9 { + java { + srcDirs = ['src/main/java9'] + } + } + + java18 { + java { + srcDirs = ['src/main/java18'] + } + } +} + +compileJava { + sourceCompatibility = 8 + targetCompatibility = 8 +} + +compileJava9Java { + sourceCompatibility = 9 + targetCompatibility = 9 +} + +compileJava18Java { + sourceCompatibility = 18 + targetCompatibility = 18 } repositories { @@ -25,6 +48,9 @@ java { } dependencies { + java9Implementation files(sourceSets.main.output.classesDirs) { builtBy compileJava } + java18Implementation files(sourceSets.main.output.classesDirs) { builtBy compileJava } + testImplementation project(':java:testUtils') testImplementation 'junit:junit:4.+' testImplementation 'org.apache.commons:commons-math3:3.6.1' @@ -97,8 +123,15 @@ if (isReleaseVersion) { } jar { + into('META-INF/versions/9') { + from sourceSets.java9.output + } + into('META-INF/versions/18') { + from sourceSets.java18.output + } manifest { - attributes 'Implementation-Title': 'Deltix Decimal Floating-Point Arithmetic Library', + attributes 'Multi-Release': 'true', + 'Implementation-Title': 'Deltix Decimal Floating-Point Arithmetic Library', 'Implementation-Version': archiveVersion, 'Main-Class': 'com.epam.deltix.dfp.Demo' } diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/Mul64Impl.java b/java/dfp/src/main/java/com/epam/deltix/dfp/Mul64Impl.java new file mode 100644 index 00000000..1d0f8346 --- /dev/null +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/Mul64Impl.java @@ -0,0 +1,30 @@ +package com.epam.deltix.dfp; + +import static com.epam.deltix.dfp.JavaImplAdd.LONG_LOW_PART; + +public class Mul64Impl { + public static long multiplyHigh(final long __CX, final long __CY) { + //__mul_64x64_to_128(ALBL, (A)_w0, (B)_w0); + { + long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; + __CXH = __CX >>> 32; + __CXL = LONG_LOW_PART & __CX; + __CYH = __CY >>> 32; + __CYL = LONG_LOW_PART & __CY; + + __PM = __CXH * __CYL; + __PH = __CXH * __CYH; + __PL = __CXL * __CYL; + __PM2 = __CXL * __CYH; + __PH += (__PM >>> 32); + __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); + + return /*w1 =*/ __PH + (__PM >>> 32); +// _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + } + } + + public static long unsignedMultiplyHigh(final long A, final long T) { + return multiplyHigh(A, T); + } +} diff --git a/java/dfp/src/main/java18/com/epam/deltix/dfp/Mul64Impl.java b/java/dfp/src/main/java18/com/epam/deltix/dfp/Mul64Impl.java new file mode 100644 index 00000000..8f9c6d52 --- /dev/null +++ b/java/dfp/src/main/java18/com/epam/deltix/dfp/Mul64Impl.java @@ -0,0 +1,11 @@ +package com.epam.deltix.dfp; + +public class Mul64Impl { + public static long multiplyHigh(final long A, final long T) { + return Math.multiplyHigh(A, T); + } + + public static long unsignedMultiplyHigh(final long A, final long T) { + return Math.unsignedMultiplyHigh(A, T); + } +} diff --git a/java/dfp/src/main/java9/com/epam/deltix/dfp/Mul64Impl.java b/java/dfp/src/main/java9/com/epam/deltix/dfp/Mul64Impl.java new file mode 100644 index 00000000..94e8c68b --- /dev/null +++ b/java/dfp/src/main/java9/com/epam/deltix/dfp/Mul64Impl.java @@ -0,0 +1,15 @@ +package com.epam.deltix.dfp; + +import static com.epam.deltix.dfp.JavaImplAdd.LONG_LOW_PART; + +public class Mul64Impl { + public static long multiplyHigh(final long A, final long T) { + return Math.multiplyHigh(A, T); + } + + public static long unsignedMultiplyHigh(final long x, final long y) { + return Math.multiplyHigh(x, y) + + (y & (x >> 63)) + // equivalent to `if (x < 0) result += y;` + (x & (y >> 63)); // equivalent to `if (y < 0) result += x;` + } +} diff --git a/java/dfp/src/test/java/com/epam/deltix/dfp/JavaImplTest.java b/java/dfp/src/test/java/com/epam/deltix/dfp/JavaImplTest.java index dc24ded4..36767372 100644 --- a/java/dfp/src/test/java/com/epam/deltix/dfp/JavaImplTest.java +++ b/java/dfp/src/test/java/com/epam/deltix/dfp/JavaImplTest.java @@ -15,6 +15,7 @@ import java.util.Random; import static com.epam.deltix.dfp.JavaImpl.*; +import static com.epam.deltix.dfp.JavaImplAdd.LONG_LOW_PART; import static com.epam.deltix.dfp.JavaImplCmp.MASK_BINARY_SIG2; import static com.epam.deltix.dfp.JavaImplCmp.MASK_BINARY_OR2; import static com.epam.deltix.dfp.TestUtils.*; @@ -911,4 +912,86 @@ public void div10Test() { assertEquals(r, coefficient10); } + + @Test + public void mul0() { + final long[] testValues = new long[]{ + 0, 4, 5, 10, 1153, 1155, + Integer.MAX_VALUE - 11, Integer.MAX_VALUE - 1, + Integer.MAX_VALUE, 0x80000000L, + 0x80000001L, 0x80000007L, + Long.MAX_VALUE - 13, Long.MAX_VALUE - 1, + Long.MAX_VALUE, 0x8000000000000000L, + 0x8000000000000001L, 0x8000000000000011L, + 0xFFFFFFFFFFFFFFFCL, 0xFFFFFFFFFFFFFFFFL, + }; + + final boolean[] doOp = new boolean[]{false, true}; + + for (final long au : testValues) + for (final long bu : testValues) + for (final boolean negA : doOp) + for (final boolean negB : doOp) + for (final boolean shrA : doOp) + for (final boolean shrB : doOp) { + + final long a = prepareArg(au, negA, shrA); + final long b = prepareArg(bu, negB, shrB); + + final long w1, w0; + { + long __CX = a; + long __CY = b; + long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; + __CXH = __CX >>> 32; + __CXL = LONG_LOW_PART & __CX; + __CYH = __CY >>> 32; + __CYL = LONG_LOW_PART & __CY; + + __PM = __CXH * __CYL; + __PH = __CXH * __CYH; + __PL = __CXL * __CYL; + __PM2 = __CXL * __CYH; + __PH += (__PM >>> 32); + __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); + + w1 = __PH + (__PM >>> 32); + w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + } + final BigInteger rOld = unsignedLongToBigInteger(w1).multiply(twoPow64).add(unsignedLongToBigInteger(w0)); + + final BigInteger ab = unsignedLongToBigInteger(a); + final BigInteger bb = unsignedLongToBigInteger(b); + final BigInteger rb = ab.multiply(bb); + + if (!rb.equals(rOld)) + throw new RuntimeException("The case " + a + " * " + b + " result " + rb + " != " + rOld); + + final long m1 = Mul64Impl.unsignedMultiplyHigh(a, b); + final long m0 = a * b; + + if (w1 != m1 || w0 != m0) + throw new RuntimeException("The case " + Long.toHexString(a) + " * " + Long.toHexString(b) + " result [" + + Long.toHexString(w1) + ", " + Long.toHexString(w0) + "] != [" + + Long.toHexString(m1) + ", " + Long.toHexString(m0) + "]"); + } + } + + private static long prepareArg(long x, final boolean negX, final boolean shrX) { + if (negX) + x = -x; + if (shrX) + x = x >>> 1; + return x; + } + + private static final BigInteger twoPow64 = unsignedLongToBigInteger(0x100000000L).multiply(unsignedLongToBigInteger(0x100000000L)); + + private static BigInteger unsignedLongToBigInteger(long x) { + final byte[] p = new byte[9]; + for (int i = 0; i < p.length; ++i, x = x >>> 8) + p[p.length - 1 - i] = (byte) (x & 0xFF); + + return new BigInteger(p); + } } diff --git a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java index 6cb857aa..dfd4e067 100644 --- a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java +++ b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java @@ -11,6 +11,7 @@ import static com.epam.deltix.dfp.JavaImpl.*; import static com.epam.deltix.dfp.JavaImpl.EXPONENT_SHIFT_SMALL; +import static com.epam.deltix.dfp.JavaImplAdd.LONG_LOW_PART; @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @@ -34,272 +35,45 @@ public void setUp() { } @Benchmark - public void fmaNative(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(NativeImpl.multiplyAndAdd(decimalValues[i], decimalValues[i + 1], decimalValues[i + 2])); - } - - @Benchmark - public void fma(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.multiplyAndAdd(decimalValues[i], decimalValues[i + 1], decimalValues[i + 2])); - } - - @Benchmark - public void multiplyByInt32Native(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(NativeImpl.multiplyByInt32(decimalValues[i], (int) decimalValues[i + 1])); - } - - @Benchmark - public void multiplyByInt32(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.multiplyByInteger(decimalValues[i], (int) decimalValues[i + 1])); - } - - @Benchmark - public void multiplyByInt64Native(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(NativeImpl.multiplyByInt64(decimalValues[i], decimalValues[i + 1])); - } - - @Benchmark - public void multiplyByInt64(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.multiplyByInteger(decimalValues[i], decimalValues[i + 1])); - } - - @Benchmark - public void divideByInt32Native(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(NativeImpl.divideByInt32(decimalValues[i], (int) decimalValues[i + 1])); - } - - @Benchmark - public void divideByInt32(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.divideByInteger(decimalValues[i], (int) decimalValues[i + 1])); - } - - @Benchmark - public void divideByInt64Native(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(NativeImpl.divideByInt64(decimalValues[i], decimalValues[i + 1])); - } - - @Benchmark - public void divideByInt64(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.divideByInteger(decimalValues[i], decimalValues[i + 1])); - } + public void mul0(Blackhole bh) { + for (int i = 0; i < 1000; ++i) { + long __CX = decimalValues[i]; + long __CY = decimalValues[i + 1]; + long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; + __CXH = __CX >>> 32; + __CXL = LONG_LOW_PART & __CX; + __CYH = __CY >>> 32; + __CYL = LONG_LOW_PART & __CY; - @Benchmark - public void mean2Native(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(NativeImpl.mean2(decimalValues[i], decimalValues[i + 1])); - } - - @Benchmark - public void mean2(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.mean(decimalValues[i], decimalValues[i + 1])); - } - - @Benchmark - public void minNative(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(NativeImpl.min2(decimalValues[i], decimalValues[i + 1])); - } - - @Benchmark - public void min(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.min(decimalValues[i], decimalValues[i + 1])); - } - - @Benchmark - public void maxNative(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(NativeImpl.max2(decimalValues[i], decimalValues[i + 1])); - } + __PM = __CXH * __CYL; + __PH = __CXH * __CYH; + __PL = __CXL * __CYL; + __PM2 = __CXL * __CYH; + __PH += (__PM >>> 32); + __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - @Benchmark - public void max(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.max(decimalValues[i], decimalValues[i + 1])); - } - - @Benchmark - public void max4Native(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(NativeImpl.max4(decimalValues[i], decimalValues[i + 1], decimalValues[i + 2], decimalValues[i + 3])); - } - - @Benchmark - public void max4(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.max(decimalValues[i], decimalValues[i + 1], decimalValues[i + 2], decimalValues[i + 3])); - } - - @Benchmark - public void divNative(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(NativeImpl.divide(decimalValues[i], decimalValues[i + 1])); - } - - @Benchmark - public void div(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.divide(decimalValues[i], decimalValues[i + 1])); - } - - @Benchmark - public void div2(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(JavaImplDiv.div2(decimalValues[i])); - } - - @Benchmark - public void mulNative(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(NativeImpl.multiply2(decimalValues[i], decimalValues[i + 1])); - } - - @Benchmark - public void mul(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.multiply(decimalValues[i], decimalValues[i + 1])); - } - - @Benchmark - public void addNative(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(NativeImpl.add2(decimalValues[i], decimalValues[i + 1])); - } - - @Benchmark - public void add(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.add(decimalValues[i], decimalValues[i + 1])); - } - - @Benchmark - public void nextUpNative(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(NativeImpl.nextUp(decimalValues[i])); - } - - @Benchmark - public void nextUp(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.nextUp(decimalValues[i])); - } - - @Benchmark - public void nextDownNative(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(NativeImpl.nextDown(decimalValues[i])); - } - - @Benchmark - public void nextDown(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.nextDown(decimalValues[i])); - } - - @Benchmark - public void nopJustIter(Blackhole bh) { - for (int i = 1; i < 1000; ++i) - bh.consume(decimalValues[i]); - } - - @Benchmark - public void addMulTwoCalls(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.add(decimalValues[i], Decimal64Utils.multiply(decimalValues[i + 1], decimalValues[i + 2]))); - } - - @Benchmark - public void addMulOneCall(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.multiplyAndAdd(decimalValues[i + 1], decimalValues[i + 2], decimalValues[i])); - } - - @Benchmark - public void basketPnlOld(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.subtract(decimalValues[i], Decimal64Utils.multiply(decimalValues[i + 1], decimalValues[i + 2]))); - } - - @Benchmark - public void basketPnlNew(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.negate(Decimal64Utils.multiplyAndAdd(decimalValues[i + 1], decimalValues[i + 2], Decimal64Utils.negate(decimalValues[i])))); - } - - @Benchmark - public void fdimOld(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.max(Decimal64Utils.ZERO, Decimal64Utils.add(decimalValues[i], decimalValues[i + 1]))); + bh.consume(__PH + (__PM >>> 32)); + bh.consume((__PM << 32) + (LONG_LOW_PART & __PL)); + } } -// @Benchmark -// public void fdimNew(Blackhole bh) { -// for (int i = 0; i < 1000; ++i) -// bh.consume(NativeImpl.bid64Fdim(decimalValues[i], Decimal64Utils.negate(decimalValues[i + 1]))); -// } - @Benchmark - public void fromDecimalDouble(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(Decimal64Utils.fromDecimalDouble(doubleValues[i])); + public void mulJdk9FromDfp(Blackhole bh) { + for (int i = 0; i < 1000; ++i) { + final long A = decimalValues[i]; + final long T = decimalValues[i + 1]; + bh.consume(Mul64Impl.mulHigh(A, T)); + bh.consume(A * T); + } } @Benchmark - public void fromDecimalDoubleOrig(Blackhole bh) { - for (int i = 0; i < 1000; ++i) - bh.consume(fromDecimalDoubleOrig(doubleValues[i])); - } - - public static long fromDecimalDoubleOrig(final double x) { - final long y = Decimal64Utils.fromDouble(x); - long m, signAndExp; - - // Odd + special encoding(16 digits) - final long notY = ~y; - if ((MASK_SPECIAL & notY) == 0) { - if ((MASK_INFINITY_AND_NAN & notY) == 0) - return y; - - m = (y & LARGE_COEFFICIENT_MASK) + LARGE_COEFFICIENT_HIGH_BIT; - signAndExp = ((y << 2) & EXPONENT_MASK_SMALL) + (y & MASK_SIGN); - } else { - m = y & SMALL_COEFFICIENT_MASK; - // 16 digits + odd - signAndExp = y & (-1L << EXPONENT_SHIFT_SMALL); - if (m <= MAX_COEFFICIENT / 10 + 1) - return y; - } - - if ((y & 1) == 0) - return y; - // NeedAdjustment - // Check the last digit - final long m1 = m + 1; - m = m1 / 10; - if (m1 - m * 10 > 2) - return y; - - signAndExp += 1L << EXPONENT_SHIFT_SMALL; - if (Decimal64Utils.toDouble(signAndExp + m) != x) - return y; - - for (long n = m; ; ) { - final long m10 = n / 10; - if (m10 * 10 != n) - return signAndExp + n; - n = m10; - signAndExp += 1L << EXPONENT_SHIFT_SMALL; + public void mulJdk9XXX(Blackhole bh) { + for (int i = 0; i < 1000; ++i) { + final long A = decimalValues[i]; + final long T = decimalValues[i + 1]; + bh.consume(Math.multiplyHigh(A, T)); + bh.consume(A * T); } } From b58c6b4a43383917edd1f71291370be022b0e5fa Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Mon, 26 Jun 2023 22:48:56 +0300 Subject: [PATCH 10/29] Java: Update to minimal version 8. --- java/systemInfo/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/systemInfo/build.gradle b/java/systemInfo/build.gradle index 71351d4c..d9f044cc 100644 --- a/java/systemInfo/build.gradle +++ b/java/systemInfo/build.gradle @@ -6,7 +6,7 @@ repositories { mavenCentral() } -sourceCompatibility = 1.7 +sourceCompatibility = 8 jar { manifest { From 0040c046257e168142e74c0681ffed4e5eb5fd4f Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Tue, 27 Jun 2023 00:04:07 +0300 Subject: [PATCH 11/29] Java: Add multiplication benchmark. --- .../com/epam/deltix/dfp/MathBenchmark.java | 290 ++++++++++++++++-- .../com/epam/deltix/dfp/MulBenchmark.java | 87 ++++++ 2 files changed, 345 insertions(+), 32 deletions(-) create mode 100644 java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java diff --git a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java index dfd4e067..6cb857aa 100644 --- a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java +++ b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java @@ -11,7 +11,6 @@ import static com.epam.deltix.dfp.JavaImpl.*; import static com.epam.deltix.dfp.JavaImpl.EXPONENT_SHIFT_SMALL; -import static com.epam.deltix.dfp.JavaImplAdd.LONG_LOW_PART; @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @@ -35,45 +34,272 @@ public void setUp() { } @Benchmark - public void mul0(Blackhole bh) { - for (int i = 0; i < 1000; ++i) { - long __CX = decimalValues[i]; - long __CY = decimalValues[i + 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; + public void fmaNative(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(NativeImpl.multiplyAndAdd(decimalValues[i], decimalValues[i + 1], decimalValues[i + 2])); + } - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); + @Benchmark + public void fma(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.multiplyAndAdd(decimalValues[i], decimalValues[i + 1], decimalValues[i + 2])); + } - bh.consume(__PH + (__PM >>> 32)); - bh.consume((__PM << 32) + (LONG_LOW_PART & __PL)); - } + @Benchmark + public void multiplyByInt32Native(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(NativeImpl.multiplyByInt32(decimalValues[i], (int) decimalValues[i + 1])); } @Benchmark - public void mulJdk9FromDfp(Blackhole bh) { - for (int i = 0; i < 1000; ++i) { - final long A = decimalValues[i]; - final long T = decimalValues[i + 1]; - bh.consume(Mul64Impl.mulHigh(A, T)); - bh.consume(A * T); - } + public void multiplyByInt32(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.multiplyByInteger(decimalValues[i], (int) decimalValues[i + 1])); + } + + @Benchmark + public void multiplyByInt64Native(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(NativeImpl.multiplyByInt64(decimalValues[i], decimalValues[i + 1])); + } + + @Benchmark + public void multiplyByInt64(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.multiplyByInteger(decimalValues[i], decimalValues[i + 1])); + } + + @Benchmark + public void divideByInt32Native(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(NativeImpl.divideByInt32(decimalValues[i], (int) decimalValues[i + 1])); + } + + @Benchmark + public void divideByInt32(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.divideByInteger(decimalValues[i], (int) decimalValues[i + 1])); + } + + @Benchmark + public void divideByInt64Native(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(NativeImpl.divideByInt64(decimalValues[i], decimalValues[i + 1])); + } + + @Benchmark + public void divideByInt64(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.divideByInteger(decimalValues[i], decimalValues[i + 1])); + } + + @Benchmark + public void mean2Native(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(NativeImpl.mean2(decimalValues[i], decimalValues[i + 1])); + } + + @Benchmark + public void mean2(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.mean(decimalValues[i], decimalValues[i + 1])); + } + + @Benchmark + public void minNative(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(NativeImpl.min2(decimalValues[i], decimalValues[i + 1])); + } + + @Benchmark + public void min(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.min(decimalValues[i], decimalValues[i + 1])); + } + + @Benchmark + public void maxNative(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(NativeImpl.max2(decimalValues[i], decimalValues[i + 1])); + } + + @Benchmark + public void max(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.max(decimalValues[i], decimalValues[i + 1])); } @Benchmark - public void mulJdk9XXX(Blackhole bh) { - for (int i = 0; i < 1000; ++i) { - final long A = decimalValues[i]; - final long T = decimalValues[i + 1]; - bh.consume(Math.multiplyHigh(A, T)); - bh.consume(A * T); + public void max4Native(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(NativeImpl.max4(decimalValues[i], decimalValues[i + 1], decimalValues[i + 2], decimalValues[i + 3])); + } + + @Benchmark + public void max4(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.max(decimalValues[i], decimalValues[i + 1], decimalValues[i + 2], decimalValues[i + 3])); + } + + @Benchmark + public void divNative(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(NativeImpl.divide(decimalValues[i], decimalValues[i + 1])); + } + + @Benchmark + public void div(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.divide(decimalValues[i], decimalValues[i + 1])); + } + + @Benchmark + public void div2(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(JavaImplDiv.div2(decimalValues[i])); + } + + @Benchmark + public void mulNative(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(NativeImpl.multiply2(decimalValues[i], decimalValues[i + 1])); + } + + @Benchmark + public void mul(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.multiply(decimalValues[i], decimalValues[i + 1])); + } + + @Benchmark + public void addNative(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(NativeImpl.add2(decimalValues[i], decimalValues[i + 1])); + } + + @Benchmark + public void add(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.add(decimalValues[i], decimalValues[i + 1])); + } + + @Benchmark + public void nextUpNative(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(NativeImpl.nextUp(decimalValues[i])); + } + + @Benchmark + public void nextUp(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.nextUp(decimalValues[i])); + } + + @Benchmark + public void nextDownNative(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(NativeImpl.nextDown(decimalValues[i])); + } + + @Benchmark + public void nextDown(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.nextDown(decimalValues[i])); + } + + @Benchmark + public void nopJustIter(Blackhole bh) { + for (int i = 1; i < 1000; ++i) + bh.consume(decimalValues[i]); + } + + @Benchmark + public void addMulTwoCalls(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.add(decimalValues[i], Decimal64Utils.multiply(decimalValues[i + 1], decimalValues[i + 2]))); + } + + @Benchmark + public void addMulOneCall(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.multiplyAndAdd(decimalValues[i + 1], decimalValues[i + 2], decimalValues[i])); + } + + @Benchmark + public void basketPnlOld(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.subtract(decimalValues[i], Decimal64Utils.multiply(decimalValues[i + 1], decimalValues[i + 2]))); + } + + @Benchmark + public void basketPnlNew(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.negate(Decimal64Utils.multiplyAndAdd(decimalValues[i + 1], decimalValues[i + 2], Decimal64Utils.negate(decimalValues[i])))); + } + + @Benchmark + public void fdimOld(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.max(Decimal64Utils.ZERO, Decimal64Utils.add(decimalValues[i], decimalValues[i + 1]))); + } + +// @Benchmark +// public void fdimNew(Blackhole bh) { +// for (int i = 0; i < 1000; ++i) +// bh.consume(NativeImpl.bid64Fdim(decimalValues[i], Decimal64Utils.negate(decimalValues[i + 1]))); +// } + + @Benchmark + public void fromDecimalDouble(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.fromDecimalDouble(doubleValues[i])); + } + + @Benchmark + public void fromDecimalDoubleOrig(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(fromDecimalDoubleOrig(doubleValues[i])); + } + + public static long fromDecimalDoubleOrig(final double x) { + final long y = Decimal64Utils.fromDouble(x); + long m, signAndExp; + + // Odd + special encoding(16 digits) + final long notY = ~y; + if ((MASK_SPECIAL & notY) == 0) { + if ((MASK_INFINITY_AND_NAN & notY) == 0) + return y; + + m = (y & LARGE_COEFFICIENT_MASK) + LARGE_COEFFICIENT_HIGH_BIT; + signAndExp = ((y << 2) & EXPONENT_MASK_SMALL) + (y & MASK_SIGN); + } else { + m = y & SMALL_COEFFICIENT_MASK; + // 16 digits + odd + signAndExp = y & (-1L << EXPONENT_SHIFT_SMALL); + if (m <= MAX_COEFFICIENT / 10 + 1) + return y; + } + + if ((y & 1) == 0) + return y; + // NeedAdjustment + // Check the last digit + final long m1 = m + 1; + m = m1 / 10; + if (m1 - m * 10 > 2) + return y; + + signAndExp += 1L << EXPONENT_SHIFT_SMALL; + if (Decimal64Utils.toDouble(signAndExp + m) != x) + return y; + + for (long n = m; ; ) { + final long m10 = n / 10; + if (m10 * 10 != n) + return signAndExp + n; + n = m10; + signAndExp += 1L << EXPONENT_SHIFT_SMALL; } } diff --git a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java new file mode 100644 index 00000000..647c5641 --- /dev/null +++ b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java @@ -0,0 +1,87 @@ +package com.epam.deltix.dfp; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.util.concurrent.TimeUnit; + +import static com.epam.deltix.dfp.JavaImpl.*; +import static com.epam.deltix.dfp.JavaImpl.EXPONENT_SHIFT_SMALL; +import static com.epam.deltix.dfp.JavaImplAdd.LONG_LOW_PART; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Warmup(time = 3, iterations = 1) +@Measurement(time = 3, iterations = 3) +@State(Scope.Thread) +public class MulBenchmark { + private long[] decimalValues; + private double[] doubleValues; + public static int fixedSeed = 42 * 42 * 42 * 42 * 42; + + @Setup + public void setUp() { + TestUtils.RandomDecimalsGenerator generator = new TestUtils.RandomDecimalsGenerator(fixedSeed); + decimalValues = new long[1004]; + doubleValues = new double[decimalValues.length]; + for (int i = 0; i < decimalValues.length; ++i) { + decimalValues[i] = generator.nextX(); + doubleValues[i] = Decimal64Utils.toDouble(decimalValues[i]); + } + } + + @Benchmark + public void mul0(Blackhole bh) { + for (int i = 0; i < 1000; ++i) { + long __CX = decimalValues[i]; + long __CY = decimalValues[i + 1]; + long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; + __CXH = __CX >>> 32; + __CXL = LONG_LOW_PART & __CX; + __CYH = __CY >>> 32; + __CYL = LONG_LOW_PART & __CY; + + __PM = __CXH * __CYL; + __PH = __CXH * __CYH; + __PL = __CXL * __CYL; + __PM2 = __CXL * __CYH; + __PH += (__PM >>> 32); + __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); + + bh.consume(__PH + (__PM >>> 32)); + bh.consume((__PM << 32) + (LONG_LOW_PART & __PL)); + } + } + + @Benchmark + public void multiplyHigh(Blackhole bh) { + for (int i = 0; i < 1000; ++i) { + final long A = decimalValues[i]; + final long T = decimalValues[i + 1]; + bh.consume(Mul64Impl.multiplyHigh(A, T)); + bh.consume(A * T); + } + } + + @Benchmark + public void unsignedMultiplyHigh(Blackhole bh) { + for (int i = 0; i < 1000; ++i) { + final long A = decimalValues[i]; + final long T = decimalValues[i + 1]; + bh.consume(Mul64Impl.unsignedMultiplyHigh(A, T)); + bh.consume(A * T); + } + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(".*" + MathBenchmark.class.getSimpleName() + ".*") + .forks(1) + .build(); + new Runner(opt).run(); + } +} From 799c2a1f29ee8651469ad182dc373019ec751d85 Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Tue, 27 Jun 2023 00:08:39 +0300 Subject: [PATCH 12/29] Java: Fix sourceCompatibility --- java/dfp-math/build.gradle | 6 +----- java/dfpNativeTests/build.gradle | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/java/dfp-math/build.gradle b/java/dfp-math/build.gradle index ef9ac990..27977397 100644 --- a/java/dfp-math/build.gradle +++ b/java/dfp-math/build.gradle @@ -9,11 +9,7 @@ if (isReleaseVersion) { group = 'com.epam.deltix' -sourceCompatibility = 1.7 -compileTestJava { - sourceCompatibility = "1.8" - targetCompatibility = "1.8" -} +sourceCompatibility = 8 repositories { mavenCentral() diff --git a/java/dfpNativeTests/build.gradle b/java/dfpNativeTests/build.gradle index 510746a2..ae8ee39a 100644 --- a/java/dfpNativeTests/build.gradle +++ b/java/dfpNativeTests/build.gradle @@ -7,7 +7,7 @@ apply plugin: 'me.champeau.gradle.jmh' group = 'com.epam.deltix' -sourceCompatibility = 1.8 +sourceCompatibility = 8 repositories { mavenCentral() From 31034aa0d871d2b6620eda2bcab7f0c875d221b5 Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Tue, 27 Jun 2023 00:32:20 +0300 Subject: [PATCH 13/29] Java: Fix JMH --- java/dfp/src/main/java/com/epam/deltix/dfp/Mul64Impl.java | 2 +- .../dfp/src/main/java18/com/epam/deltix/dfp/Mul64Impl.java | 2 +- java/dfp/src/main/java9/com/epam/deltix/dfp/Mul64Impl.java | 2 +- java/dfpNativeTests/build.gradle | 7 +------ .../src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java | 2 +- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/Mul64Impl.java b/java/dfp/src/main/java/com/epam/deltix/dfp/Mul64Impl.java index 1d0f8346..9f01db5e 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/Mul64Impl.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/Mul64Impl.java @@ -2,7 +2,7 @@ import static com.epam.deltix.dfp.JavaImplAdd.LONG_LOW_PART; -public class Mul64Impl { +class Mul64Impl { public static long multiplyHigh(final long __CX, final long __CY) { //__mul_64x64_to_128(ALBL, (A)_w0, (B)_w0); { diff --git a/java/dfp/src/main/java18/com/epam/deltix/dfp/Mul64Impl.java b/java/dfp/src/main/java18/com/epam/deltix/dfp/Mul64Impl.java index 8f9c6d52..5f9bbd67 100644 --- a/java/dfp/src/main/java18/com/epam/deltix/dfp/Mul64Impl.java +++ b/java/dfp/src/main/java18/com/epam/deltix/dfp/Mul64Impl.java @@ -1,6 +1,6 @@ package com.epam.deltix.dfp; -public class Mul64Impl { +class Mul64Impl { public static long multiplyHigh(final long A, final long T) { return Math.multiplyHigh(A, T); } diff --git a/java/dfp/src/main/java9/com/epam/deltix/dfp/Mul64Impl.java b/java/dfp/src/main/java9/com/epam/deltix/dfp/Mul64Impl.java index 94e8c68b..ec347837 100644 --- a/java/dfp/src/main/java9/com/epam/deltix/dfp/Mul64Impl.java +++ b/java/dfp/src/main/java9/com/epam/deltix/dfp/Mul64Impl.java @@ -2,7 +2,7 @@ import static com.epam.deltix.dfp.JavaImplAdd.LONG_LOW_PART; -public class Mul64Impl { +class Mul64Impl { public static long multiplyHigh(final long A, final long T) { return Math.multiplyHigh(A, T); } diff --git a/java/dfpNativeTests/build.gradle b/java/dfpNativeTests/build.gradle index ae8ee39a..a586caf9 100644 --- a/java/dfpNativeTests/build.gradle +++ b/java/dfpNativeTests/build.gradle @@ -1,9 +1,8 @@ plugins { - id "me.champeau.gradle.jmh" version "0.5.3" apply false + id "me.champeau.jmh" version "0.7.1" } apply plugin: 'java' -apply plugin: 'me.champeau.gradle.jmh' group = 'com.epam.deltix' @@ -41,7 +40,3 @@ task copyNativeDfpResources(type: Copy) { } compileJava.dependsOn(copyNativeDfpResources) -jmh { - include = '.*Benchmark.*' -} - diff --git a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java index 647c5641..4f354ad0 100644 --- a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java +++ b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java @@ -79,7 +79,7 @@ public void unsignedMultiplyHigh(Blackhole bh) { public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder() - .include(".*" + MathBenchmark.class.getSimpleName() + ".*") + .include(".*" + MulBenchmark.class.getSimpleName() + ".*") .forks(1) .build(); new Runner(opt).run(); From b59571ae01b7768d2f8d0bbb548712cad229278e Mon Sep 17 00:00:00 2001 From: agdavydov81 Date: Tue, 27 Jun 2023 17:35:43 +0300 Subject: [PATCH 14/29] CI: Update Java version --- .github/workflows/Build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/Build.yml b/.github/workflows/Build.yml index 0fdc6cb6..f18c394a 100644 --- a/.github/workflows/Build.yml +++ b/.github/workflows/Build.yml @@ -146,7 +146,7 @@ jobs: uses: actions/setup-java@v3 with: distribution: 'adopt' - java-version: '8' + java-version: '19' - name: build run: | ./gradlew jar @@ -195,7 +195,7 @@ jobs: fail-fast: false matrix: os: [ 'ubuntu-latest', 'windows-2019', 'macos-latest'] - java: [ '8', '11', '15'] + java: [ '8', '11', '19'] steps: - uses: actions/checkout@v3 with: From 1dbbe96fec84c2c40df1cae5b0d5824ede9d8497 Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Wed, 28 Jun 2023 00:49:52 +0300 Subject: [PATCH 15/29] Java: Make task for the tests compilation to jars. [skip ci] --- java/dfp-math/build.gradle | 15 +++++++++++++++ java/dfp/build.gradle | 14 ++++++++++++++ java/dfpNativeTests/build.gradle | 14 ++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/java/dfp-math/build.gradle b/java/dfp-math/build.gradle index 27977397..f4165944 100644 --- a/java/dfp-math/build.gradle +++ b/java/dfp-math/build.gradle @@ -130,3 +130,18 @@ jar { sourcesJar { duplicatesStrategy = DuplicatesStrategy.EXCLUDE } + +task testsJar(type: Jar, dependsOn: [jar, testClasses, processTestResources]) { + archiveClassifier = 'tests' + from sourceSets.test.output +} + +task copyTestDeps(type: Copy) { + from(sourceSets.test.runtimeClasspath) { include '*.jar' } + into('testLibs') +} + +task copyTestJars(type: Copy, dependsOn: [testsJar, copyTestDeps]) { + from(testsJar.outputs.files) + into('testLibs') +} diff --git a/java/dfp/build.gradle b/java/dfp/build.gradle index c217825f..c6b10c2a 100644 --- a/java/dfp/build.gradle +++ b/java/dfp/build.gradle @@ -143,3 +143,17 @@ jar { } } +task testsJar(type: Jar, dependsOn: [jar, testClasses, processTestResources]) { + archiveClassifier = 'tests' + from sourceSets.test.output +} + +task copyTestDeps(type: Copy) { + from(sourceSets.test.runtimeClasspath) { include '*.jar' } + into('testLibs') +} + +task copyTestJars(type: Copy, dependsOn: [testsJar, copyTestDeps]) { + from(testsJar.outputs.files) + into('testLibs') +} diff --git a/java/dfpNativeTests/build.gradle b/java/dfpNativeTests/build.gradle index a586caf9..f1090332 100644 --- a/java/dfpNativeTests/build.gradle +++ b/java/dfpNativeTests/build.gradle @@ -40,3 +40,17 @@ task copyNativeDfpResources(type: Copy) { } compileJava.dependsOn(copyNativeDfpResources) +task testsJar(type: Jar, dependsOn: [jar, testClasses, processTestResources]) { + archiveClassifier = 'tests' + from sourceSets.test.output +} + +task copyTestDeps(type: Copy) { + from(sourceSets.test.runtimeClasspath) { include '*.jar' } + into('testLibs') +} + +task copyTestJars(type: Copy, dependsOn: [testsJar, copyTestDeps]) { + from(testsJar.outputs.files) + into('testLibs') +} From 47d15cd824a6d5cf2abcf6daf3d36d58371c7e6b Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Wed, 28 Jun 2023 12:21:58 +0300 Subject: [PATCH 16/29] Java: Initial version of the tests run from JARs [skip ci] --- java/dfp-math/build.gradle | 11 +++++++++++ java/dfp/build.gradle | 11 +++++++++++ java/dfpNativeTests/build.gradle | 11 +++++++++++ java/vtaTest/build.gradle | 24 ++++++++++++++++++++++++ 4 files changed, 57 insertions(+) diff --git a/java/dfp-math/build.gradle b/java/dfp-math/build.gradle index f4165944..251fbb1d 100644 --- a/java/dfp-math/build.gradle +++ b/java/dfp-math/build.gradle @@ -145,3 +145,14 @@ task copyTestJars(type: Copy, dependsOn: [testsJar, copyTestDeps]) { from(testsJar.outputs.files) into('testLibs') } + +task runTestJars(type: JavaExec) { + mainClass = 'org.junit.runner.JUnitCore' + classpath = files { file('testLibs').listFiles() } + + def testClassesRoot = file('src/test/java').absolutePath + fileTree(dir: testClassesRoot, include: '**/*.java').each { File file -> + def ap = file.absolutePath + args += ap.substring(testClassesRoot.length() + 1, ap.length() - 5).replace(File.separator, '.') + } +} diff --git a/java/dfp/build.gradle b/java/dfp/build.gradle index c6b10c2a..6e5ac853 100644 --- a/java/dfp/build.gradle +++ b/java/dfp/build.gradle @@ -157,3 +157,14 @@ task copyTestJars(type: Copy, dependsOn: [testsJar, copyTestDeps]) { from(testsJar.outputs.files) into('testLibs') } + +task runTestJars(type: JavaExec) { + mainClass = 'org.junit.runner.JUnitCore' + classpath = files { file('testLibs').listFiles() } + + def testClassesRoot = file('src/test/java').absolutePath + fileTree(dir: testClassesRoot, include: '**/*.java').each { File file -> + def ap = file.absolutePath + args += ap.substring(testClassesRoot.length() + 1, ap.length() - 5).replace(File.separator, '.') + } +} diff --git a/java/dfpNativeTests/build.gradle b/java/dfpNativeTests/build.gradle index f1090332..9cd0610f 100644 --- a/java/dfpNativeTests/build.gradle +++ b/java/dfpNativeTests/build.gradle @@ -54,3 +54,14 @@ task copyTestJars(type: Copy, dependsOn: [testsJar, copyTestDeps]) { from(testsJar.outputs.files) into('testLibs') } + +task runTestJars(type: JavaExec) { + mainClass = 'org.junit.runner.JUnitCore' + classpath = files { file('testLibs').listFiles() } + + def testClassesRoot = file('src/test/java').absolutePath + fileTree(dir: testClassesRoot, include: '**/*.java').each { File file -> + def ap = file.absolutePath + args += ap.substring(testClassesRoot.length() + 1, ap.length() - 5).replace(File.separator, '.') + } +} diff --git a/java/vtaTest/build.gradle b/java/vtaTest/build.gradle index 0f5f8452..b1d55f4a 100644 --- a/java/vtaTest/build.gradle +++ b/java/vtaTest/build.gradle @@ -42,3 +42,27 @@ task testVta(dependsOn: compileJava, type: JavaExec) { jvmArgs += '-javaagent:' + vta_path + '=' + vta_config } test.dependsOn testVta + +task copyTestDeps(type: Copy) { + from(sourceSets.main.runtimeClasspath) { include '*.jar' } + into('testLibs') +} + +task copyTestJars(type: Copy, dependsOn: [jar, copyTestDeps]) { + from(jar.outputs.files) + into('testLibs') +} + +task runTestJars(type: JavaExec) { + mainClass = "com.example.demo.DemoApplication" + classpath = files { file('testLibs').listFiles() } + workingDir = "$projectDir" + + String vta_path = findDependency(".*value-types.*jar") + print "VTA path: " + vta_path + "\n" + + String vta_config = "$projectDir/value-types.json" + print "VTA config: " + vta_config + "\n" + + jvmArgs += '-javaagent:' + vta_path + '=' + vta_config +} From 4cdba97aeddc5fd9041eb26cc0dca28b2334436e Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Wed, 28 Jun 2023 12:46:29 +0300 Subject: [PATCH 17/29] Java: Fix runTestJars task [skip ci] --- java/dfp-math/build.gradle | 3 ++- java/dfp/build.gradle | 3 ++- java/dfpNativeTests/build.gradle | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/java/dfp-math/build.gradle b/java/dfp-math/build.gradle index 251fbb1d..f78c0dbf 100644 --- a/java/dfp-math/build.gradle +++ b/java/dfp-math/build.gradle @@ -142,6 +142,7 @@ task copyTestDeps(type: Copy) { } task copyTestJars(type: Copy, dependsOn: [testsJar, copyTestDeps]) { + from(jar.outputs.files) from(testsJar.outputs.files) into('testLibs') } @@ -151,7 +152,7 @@ task runTestJars(type: JavaExec) { classpath = files { file('testLibs').listFiles() } def testClassesRoot = file('src/test/java').absolutePath - fileTree(dir: testClassesRoot, include: '**/*.java').each { File file -> + fileTree(dir: testClassesRoot, include: '**/*Test.java').each { File file -> def ap = file.absolutePath args += ap.substring(testClassesRoot.length() + 1, ap.length() - 5).replace(File.separator, '.') } diff --git a/java/dfp/build.gradle b/java/dfp/build.gradle index 6e5ac853..1934fb07 100644 --- a/java/dfp/build.gradle +++ b/java/dfp/build.gradle @@ -154,6 +154,7 @@ task copyTestDeps(type: Copy) { } task copyTestJars(type: Copy, dependsOn: [testsJar, copyTestDeps]) { + from(jar.outputs.files) from(testsJar.outputs.files) into('testLibs') } @@ -163,7 +164,7 @@ task runTestJars(type: JavaExec) { classpath = files { file('testLibs').listFiles() } def testClassesRoot = file('src/test/java').absolutePath - fileTree(dir: testClassesRoot, include: '**/*.java').each { File file -> + fileTree(dir: testClassesRoot, include: '**/*Test.java').each { File file -> def ap = file.absolutePath args += ap.substring(testClassesRoot.length() + 1, ap.length() - 5).replace(File.separator, '.') } diff --git a/java/dfpNativeTests/build.gradle b/java/dfpNativeTests/build.gradle index 9cd0610f..c7d3021c 100644 --- a/java/dfpNativeTests/build.gradle +++ b/java/dfpNativeTests/build.gradle @@ -51,6 +51,7 @@ task copyTestDeps(type: Copy) { } task copyTestJars(type: Copy, dependsOn: [testsJar, copyTestDeps]) { + from(jar.outputs.files) from(testsJar.outputs.files) into('testLibs') } @@ -60,7 +61,7 @@ task runTestJars(type: JavaExec) { classpath = files { file('testLibs').listFiles() } def testClassesRoot = file('src/test/java').absolutePath - fileTree(dir: testClassesRoot, include: '**/*.java').each { File file -> + fileTree(dir: testClassesRoot, include: '**/*Test.java').each { File file -> def ap = file.absolutePath args += ap.substring(testClassesRoot.length() + 1, ap.length() - 5).replace(File.separator, '.') } From 7fd89cde5d3217cca1d2baac8c640be335b93edb Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Wed, 28 Jun 2023 13:31:43 +0300 Subject: [PATCH 18/29] Java: Downgrade to gradle 7.6.1 for JDK 1.7 support [skip ci] --- gradle/wrapper/gradle-wrapper.properties | 2 +- java/dfp/build.gradle | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index fae08049..774fae87 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.1.1-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.6.1-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/java/dfp/build.gradle b/java/dfp/build.gradle index 1934fb07..68988dc8 100644 --- a/java/dfp/build.gradle +++ b/java/dfp/build.gradle @@ -24,6 +24,10 @@ sourceSets { } compileJava { + sourceCompatibility = 1.7 + targetCompatibility = 1.7 +} +compileTestJava { sourceCompatibility = 8 targetCompatibility = 8 } From 5bacf6d557c2927454fd3f625d0601b1d2964174 Mon Sep 17 00:00:00 2001 From: agdavydov81 Date: Wed, 28 Jun 2023 13:44:16 +0300 Subject: [PATCH 19/29] CI: Java: Split compilation and run tests for different JDK versions --- .github/workflows/Build.yml | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/.github/workflows/Build.yml b/.github/workflows/Build.yml index f18c394a..02a6de5d 100644 --- a/.github/workflows/Build.yml +++ b/.github/workflows/Build.yml @@ -151,13 +151,17 @@ jobs: run: | ./gradlew jar ./gradlew javadocJar + ./gradlew copyTestJars - uses: actions/upload-artifact@v3 with: name: java-jars path: | ./*java/dfp/build/libs/* ./*java/dfp-math/build/libs/* - ./*java/decimalDemo/build/libs/* + ./*java/dfp/testLibs/* + ./*java/dfp-math/testLibs/* + ./*java/dfpNativeTests/testLibs/* + ./*java/vtaTest/testLibs/* retention-days: 7 build-dotnet: @@ -200,10 +204,10 @@ jobs: - uses: actions/checkout@v3 with: submodules: 'recursive' - - name: Download compress-native artifacts + - name: Download build-java artifacts uses: actions/download-artifact@v3 with: - name: compress-native + name: build-java - name: Setup java uses: actions/setup-java@v3 with: @@ -214,15 +218,7 @@ jobs: ./gradlew :java:systemInfo:runSystemInfo - name: test run: | - ./gradlew check - - name: Upload test results on failure - uses: actions/upload-artifact@v3 - if: ${{ failure() }} - with: - path: | - java/dfp/build/reports/* - java/dfp-math/build/reports/* - name: Java-{{ matrix.java }}-${{ matrix.os }}-TestReports + ./gradlew runTestJars test-dotnet: runs-on: ${{ matrix.os }} From bd0f6df20ee1e408eed6d791dc2b520c759b0ff8 Mon Sep 17 00:00:00 2001 From: agdavydov81 Date: Wed, 28 Jun 2023 14:03:01 +0300 Subject: [PATCH 20/29] CI: Fix Java artifact name --- .github/workflows/Build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/Build.yml b/.github/workflows/Build.yml index 02a6de5d..31a53a35 100644 --- a/.github/workflows/Build.yml +++ b/.github/workflows/Build.yml @@ -204,10 +204,10 @@ jobs: - uses: actions/checkout@v3 with: submodules: 'recursive' - - name: Download build-java artifacts + - name: Download java-jars artifacts uses: actions/download-artifact@v3 with: - name: build-java + name: java-jars - name: Setup java uses: actions/setup-java@v3 with: From 47382bca50f265b14a61749b8897efb8abcddb73 Mon Sep 17 00:00:00 2001 From: agdavydov81 Date: Wed, 28 Jun 2023 14:19:57 +0300 Subject: [PATCH 21/29] CI: Fix dependency --- .github/workflows/Build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Build.yml b/.github/workflows/Build.yml index 31a53a35..c5f881ed 100644 --- a/.github/workflows/Build.yml +++ b/.github/workflows/Build.yml @@ -194,7 +194,7 @@ jobs: test-java: runs-on: ${{ matrix.os }} - needs: [compress-native] + needs: [build-java] strategy: fail-fast: false matrix: From d34d705d540727083b6f5ead715d02ceefbce6e1 Mon Sep 17 00:00:00 2001 From: agdavydov81 Date: Wed, 28 Jun 2023 14:54:29 +0300 Subject: [PATCH 22/29] CI: Java: Update Release tests [skip ci] --- .github/workflows/CI.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 594cb829..fd7c3672 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -190,18 +190,22 @@ jobs: uses: actions/setup-java@v3 with: distribution: 'adopt' - java-version: '8' + java-version: '19' - name: build run: | ./gradlew jar ./gradlew javadocJar + ./gradlew copyTestJars - uses: actions/upload-artifact@v3 with: name: java-jars path: | ./*java/dfp/build/libs/* ./*java/dfp-math/build/libs/* - ./*java/decimalDemo/build/libs/* + ./*java/dfp/testLibs/* + ./*java/dfp-math/testLibs/* + ./*java/dfpNativeTests/testLibs/* + ./*java/vtaTest/testLibs/* retention-days: 7 @@ -236,15 +240,15 @@ jobs: test-java: runs-on: ubuntu-latest - needs: [compress-native] + needs: [build-java] steps: - uses: actions/checkout@v3 with: submodules: 'recursive' - - name: Download compress-native artifacts + - name: Download java-jars artifacts uses: actions/download-artifact@v3 with: - name: compress-native + name: java-jars - name: Setup java uses: actions/setup-java@v3 with: @@ -255,7 +259,7 @@ jobs: ./gradlew :java:systemInfo:runSystemInfo - name: test run: | - ./gradlew check + ./gradlew runTestJars test-dotnet: From 4d09aa750e1beccc7b1fe78f8ef26159da74520e Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Wed, 28 Jun 2023 17:11:51 +0300 Subject: [PATCH 23/29] Java: Replace direct __mul_64x64_to_128 substitutions with Mul64Impl.unsignedMultiplyHigh --- .../java/com/epam/deltix/dfp/JavaImplAdd.java | 105 +------ .../epam/deltix/dfp/JavaImplCastBinary64.java | 21 +- .../java/com/epam/deltix/dfp/JavaImplCmp.java | 231 ++------------- .../java/com/epam/deltix/dfp/JavaImplDiv.java | 126 +------- .../com/epam/deltix/dfp/JavaImplMinMax.java | 84 +----- .../java/com/epam/deltix/dfp/JavaImplMul.java | 273 ++---------------- 6 files changed, 80 insertions(+), 760 deletions(-) diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplAdd.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplAdd.java index f01afddd..16b270da 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplAdd.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplAdd.java @@ -386,25 +386,8 @@ else if ((UnsignedLong.isLess(coefficient_a, bid_power10_table_128_w0[18]))) // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT, coefficient_a, bid_reciprocals10_64[extra_digits]); - { - long __CX = coefficient_a; - long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_a, bid_reciprocals10_64[extra_digits]); + CT_w0 = coefficient_a * bid_reciprocals10_64[extra_digits]; // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[extra_digits]; @@ -442,25 +425,8 @@ else if ((UnsignedLong.isLess(coefficient_a, bid_power10_table_128_w0[18]))) // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT, coefficient_b, bid_reciprocals10_64[extra_digits]); - { - final long __CX = coefficient_b; - final long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, bid_reciprocals10_64[extra_digits]); + CT_w0 = coefficient_b * bid_reciprocals10_64[extra_digits]; // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[extra_digits]; @@ -478,25 +444,8 @@ else if ((UnsignedLong.isLess(coefficient_a, bid_power10_table_128_w0[18]))) // must divide coeff_a by 10 saved_ca = saved_ca + T1; //__mul_64x64_to_128(CA, saved_ca, 0x3333333333333334L); - { - final long __CX = saved_ca; - final long __CY = 0x3333333333333334L; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CA_w1 = __PH + (__PM >>> 32); - CA_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CA_w1 = Mul64Impl.unsignedMultiplyHigh(saved_ca, 0x3333333333333334L); + CA_w0 = saved_ca * 0x3333333333333334L; //reciprocals10_64[1]); coefficient_a = CA_w1 >>> 1; rem_a = @@ -512,25 +461,8 @@ else if ((UnsignedLong.isLess(coefficient_a, bid_power10_table_128_w0[18]))) // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT, coefficient_b, bid_reciprocals10_64[extra_digits]); - { - final long __CX = coefficient_b; - final long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, bid_reciprocals10_64[extra_digits]); + CT_w0 = coefficient_b * bid_reciprocals10_64[extra_digits]; // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[extra_digits]; C0_64 = CT_w1 >>> amount; @@ -547,25 +479,8 @@ else if ((UnsignedLong.isLess(coefficient_a, bid_power10_table_128_w0[18]))) // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT_new, coefficient_b, bid_reciprocals10_64[extra_digits]); - { - final long __CX = coefficient_b; - final long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_new_w1 = __PH + (__PM >>> 32); - CT_new_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CT_new_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, bid_reciprocals10_64[extra_digits]); + CT_new_w0 = coefficient_b * bid_reciprocals10_64[extra_digits]; // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[extra_digits]; C0_64 = CT_new_w1 >>> amount; diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCastBinary64.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCastBinary64.java index 9ede86b7..5319855f 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCastBinary64.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCastBinary64.java @@ -146,25 +146,8 @@ public static long binary64_to_bid64(double x, final int rnd_mode/*, final JavaI long /*BID_UINT64*/ QM64; //__mul_64x64_to_128(ALBL, cc_w0, pow5_w0); - { - long __CX = cc_w0; - long __CY = pow5_w0; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - ALBL_w1 = __PH + (__PM >>> 32); - ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(cc_w0, pow5_w0); + ALBL_w0 = cc_w0 * pow5_w0; QM64 = pow5_w0 * cc_w1 + cc_w0 * pow5_w1; diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java index 1590af7d..50c56eb3 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java @@ -133,25 +133,8 @@ public static int compare(final long /*BID_UINT64*/ x, final long /*BID_UINT64*/ if (exp_x > exp_y) { // to simplify the loop below, // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - { - final long __CX = sig_x; - final long __CY = bid_mult_factor[exp_x - exp_y]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x - exp_y]); + sig_n_prime_w0 = sig_x * bid_mult_factor[exp_x - exp_y]; // if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_y)) { @@ -164,25 +147,8 @@ public static int compare(final long /*BID_UINT64*/ x, final long /*BID_UINT64*/ } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - { - final long __CX = sig_y; - final long __CY = bid_mult_factor[exp_y - exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, bid_mult_factor[exp_y - exp_x]); + sig_n_prime_w0 = sig_y * bid_mult_factor[exp_y - exp_x]; // if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_x)) { @@ -394,25 +360,8 @@ public static boolean bid64_quiet_greater(final long /*BID_UINT64*/ x, final lon if (exp_x > exp_y) { // to simplify the loop below, // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - { - final long __CX = sig_x; - final long __CY = bid_mult_factor[exp_x - exp_y]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x - exp_y]); + sig_n_prime_w0 = sig_x * bid_mult_factor[exp_x - exp_y]; // if postitive, return whichever significand is larger (converse if neg.) if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_y)) { @@ -423,25 +372,8 @@ public static boolean bid64_quiet_greater(final long /*BID_UINT64*/ x, final lon } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - { - final long __CX = sig_y; - final long __CY = bid_mult_factor[exp_y - exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, bid_mult_factor[exp_y - exp_x]); + sig_n_prime_w0 = sig_y * bid_mult_factor[exp_y - exp_x]; // if positive, return whichever significand is larger // (converse if negative) @@ -557,25 +489,8 @@ public static boolean bid64_quiet_greater_equal(final long /*BID_UINT64*/ x, fin if (exp_x > exp_y) { // to simplify the loop below, // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - { - final long __CX = sig_x; - final long __CY = bid_mult_factor[exp_x - exp_y]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x - exp_y]); + sig_n_prime_w0 = sig_x * bid_mult_factor[exp_x - exp_y]; // return 1 if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_y)) { @@ -588,25 +503,8 @@ public static boolean bid64_quiet_greater_equal(final long /*BID_UINT64*/ x, fin } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - { - final long __CX = sig_y; - final long __CY = bid_mult_factor[exp_y - exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, bid_mult_factor[exp_y - exp_x]); + sig_n_prime_w0 = sig_y * bid_mult_factor[exp_y - exp_x]; // return 0 if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_x)) { @@ -725,25 +623,8 @@ public static boolean bid64_quiet_less(final long /*BID_UINT64*/ x, final long / if (exp_x > exp_y) { // to simplify the loop below, // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - { - final long __CX = sig_x; - final long __CY = bid_mult_factor[exp_x - exp_y]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x - exp_y]); + sig_n_prime_w0 = sig_x * bid_mult_factor[exp_x - exp_y]; // return 0 if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_y)) { @@ -756,25 +637,8 @@ public static boolean bid64_quiet_less(final long /*BID_UINT64*/ x, final long / } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - { - final long __CX = sig_y; - final long __CY = bid_mult_factor[exp_y - exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, bid_mult_factor[exp_y - exp_x]); + sig_n_prime_w0 = sig_y * bid_mult_factor[exp_y - exp_x]; // return 0 if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_x)) { @@ -892,25 +756,8 @@ public static boolean bid64_quiet_less_equal(final long /*BID_UINT64*/ x, final if (exp_x > exp_y) { // to simplify the loop below, // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - { - final long __CX = sig_x; - final long __CY = bid_mult_factor[exp_x - exp_y]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x - exp_y]); + sig_n_prime_w0 = sig_x * bid_mult_factor[exp_x - exp_y]; // return 1 if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_y)) { @@ -923,25 +770,8 @@ public static boolean bid64_quiet_less_equal(final long /*BID_UINT64*/ x, final } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - { - final long __CX = sig_y; - final long __CY = bid_mult_factor[exp_y - exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, bid_mult_factor[exp_y - exp_x]); + sig_n_prime_w0 = sig_y * bid_mult_factor[exp_y - exp_x]; // return 1 if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_x)) { @@ -1096,25 +926,8 @@ public static boolean bid64_isNormal(final long /*BID_UINT64*/ x) { // if (exp_x - 398 = -383) the number may be subnormal if (exp_x < 15) { // __mul_64x64_to_128MACH (sig_x_prime, sig_x, bid_mult_factor[exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - { - final long __CX = sig_x; - final long __CY = bid_mult_factor[exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_x_prime_w1 = __PH + (__PM >>> 32); - sig_x_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + sig_x_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x]); + sig_x_prime_w0 = sig_x * bid_mult_factor[exp_x]; // normal return sig_x_prime_w1 != 0 || UnsignedLong.isGreater(sig_x_prime_w0, 999999999999999L); diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplDiv.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplDiv.java index e668183a..5a205292 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplDiv.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplDiv.java @@ -231,25 +231,8 @@ else if (exponent_x < 0) T = bid_power10_table_128_w0[ed1]; //__mul_64x64_to_128 (CA, A, T); - { - long __CX = A; - long __CY = T; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CA_w1 = __PH + (__PM >>> 32); - CA_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CA_w1 = Mul64Impl.unsignedMultiplyHigh(A, T); + CA_w0 = A * T; Q = 0; diff_expon = diff_expon - ed2; @@ -298,25 +281,8 @@ else if (exponent_x < 0) T = bid_power10_table_128_w0[ed2]; //__mul_64x64_to_128 (CA, R, T); - { - long __CX = R; - long __CY = T; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CA_w1 = __PH + (__PM >>> 32); - CA_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CA_w1 = Mul64Impl.unsignedMultiplyHigh(R, T); + CA_w0 = R * T; B = coefficient_y; @@ -399,25 +365,8 @@ else if (exponent_x < 0) nzeros = d5; //__mul_64x64_to_128 (CT, Q, bid_reciprocals10_64[nzeros]); - { - long __CX = Q; - long __CY = bid_reciprocals10_64[nzeros]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CT_w1 = Mul64Impl.unsignedMultiplyHigh(Q, bid_reciprocals10_64[nzeros]); + CT_w0 = Q * bid_reciprocals10_64[nzeros]; // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[nzeros]; @@ -465,25 +414,8 @@ else if (exponent_x < 0) if (nzeros != 0) { //__mul_64x64_to_128 (CT, Q, bid_reciprocals10_64[nzeros]); - { - long __CX = Q; - long __CY = bid_reciprocals10_64[nzeros]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CT_w1 = Mul64Impl.unsignedMultiplyHigh(Q, bid_reciprocals10_64[nzeros]); + CT_w0 = Q * bid_reciprocals10_64[nzeros]; // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[nzeros]; @@ -553,45 +485,11 @@ public static long get_BID64_UF(final long sgn, final int expon, long coeff, fin long _ALBL_w0, _ALBL_w1, _ALBH_w0, _ALBH_w1, _QM2_w0, _QM2_w1; //__mul_64x64_to_128(out ALBH, A, B.w1); - { - final long __CX = _A; - final long __CY = _B_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A, _B_w1); + _ALBH_w0 = _A * _B_w1; //__mul_64x64_to_128(out ALBL, A, B.w0); - { - final long __CX = _A; - final long __CY = _B_w0; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A, _B_w0); + _ALBL_w0 = _A * _B_w0; Q_low_w0 = _ALBL_w0; //__add_128_64(out QM2, ALBH, ALBL.w1); diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java index 9154bece..69195e1e 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java @@ -142,25 +142,8 @@ public static long bid64_min_fix_nan(long /*BID_UINT64*/ x, long /*BID_UINT64*/ // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - { - final long __CX = sig_x; - final long __CY = bid_mult_factor[exp_x - exp_y]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x - exp_y]); + sig_n_prime_w0 = sig_x * bid_mult_factor[exp_x - exp_y]; // if postitive, return whichever significand is larger @@ -174,25 +157,8 @@ public static long bid64_min_fix_nan(long /*BID_UINT64*/ x, long /*BID_UINT64*/ } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - { - final long __CX = sig_y; - final long __CY = bid_mult_factor[exp_y - exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, bid_mult_factor[exp_y - exp_x]); + sig_n_prime_w0 = sig_y * bid_mult_factor[exp_y - exp_x]; // if postitive, return whichever significand is larger (converse if negative) if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_x)) { @@ -336,25 +302,8 @@ public static long bid64_max_fix_nan(long /*BID_UINT64*/ x, long /*BID_UINT64*/ if (exp_x > exp_y) { // to simplify the loop below, // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - { - final long __CX = sig_x; - final long __CY = bid_mult_factor[exp_x - exp_y]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x - exp_y]); + sig_n_prime_w0 = sig_x * bid_mult_factor[exp_x - exp_y]; // if postitive, return whichever significand is larger // (converse if negative) @@ -366,25 +315,8 @@ public static long bid64_max_fix_nan(long /*BID_UINT64*/ x, long /*BID_UINT64*/ } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - { - final long __CX = sig_y; - final long __CY = bid_mult_factor[exp_y - exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, bid_mult_factor[exp_y - exp_x]); + sig_n_prime_w0 = sig_y * bid_mult_factor[exp_y - exp_x]; // if postitive, return whichever significand is larger (converse if negative) if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_x)) { diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMul.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMul.java index 6d4c710a..c52797af 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMul.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMul.java @@ -213,25 +213,8 @@ else if (exponent_x < 0) } else { // get 128-bit product: coefficient_x*coefficient_y //__mul_64x64_to_128(P, coefficient_x, coefficient_y); - { - long __CX = coefficient_x; - long __CY = coefficient_y; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P_w1 = __PH + (__PM >>> 32); - P_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + P_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_x, coefficient_y); + P_w0 = coefficient_x * coefficient_y; // tighten binary range of P: leading bit is 2^bp // unbiased_bin_expon_product <= bp <= unbiased_bin_expon_product+1 @@ -291,88 +274,20 @@ else if (exponent_x < 0) long _ALBL_w1, _ALBH_w1, _AHBL_w1, _AHBH_w1, _QM_w1, _QM2_w1; //__mul_64x64_to_128(ALBH, (A)_w0, (B)_w1); - { - long __CX = _A_w0; - long __CY = _B_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w1); + _ALBH_w0 = _A_w0 * _B_w1; //__mul_64x64_to_128(AHBL, (B)_w0, (A)_w1); - { - long __CX = _B_w0; - long __CY = _A_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBL_w1 = __PH + (__PM >>> 32); - _AHBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBL_w1 = Mul64Impl.unsignedMultiplyHigh(_B_w0, _A_w1); + _AHBL_w0 = _B_w0 * _A_w1; //__mul_64x64_to_128(ALBL, (A)_w0, (B)_w0); - { - long __CX = _A_w0; - long __CY = _B_w0; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w0); + _ALBL_w0 = _A_w0 * _B_w0; //__mul_64x64_to_128(AHBH, (A)_w1,(B)_w1); - { - long __CX = _A_w1; - long __CY = _B_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBH_w1 = __PH + (__PM >>> 32); - _AHBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w1, _B_w1); + _AHBH_w0 = _A_w1 * _B_w1; //__add_128_128(QM, ALBH, AHBL); // add 128-bit value to 128-bit assume no carry-out { @@ -486,88 +401,20 @@ else if (exponent_x < 0) long _ALBL_w1, _ALBH_w1, _AHBL_w1, _AHBH_w1, _QM_w1, _QM2_w1; //__mul_64x64_to_128(ALBH, (A)_w0, (B)_w1); - { - long __CX = _A_w0; - long __CY = _B_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w1); + _ALBH_w0 = _A_w0 * _B_w1; //__mul_64x64_to_128(AHBL, (B)_w0, (A)_w1); - { - long __CX = _B_w0; - long __CY = _A_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBL_w1 = __PH + (__PM >>> 32); - _AHBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBL_w1 = Mul64Impl.unsignedMultiplyHigh(_B_w0, _A_w1); + _AHBL_w0 = _B_w0 * _A_w1; //__mul_64x64_to_128(ALBL, (A)_w0, (B)_w0); - { - long __CX = _A_w0; - long __CY = _B_w0; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w0); + _ALBL_w0 = _A_w0 * _B_w0; //__mul_64x64_to_128(AHBH, (A)_w1,(B)_w1); - { - long __CX = _A_w1; - long __CY = _B_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBH_w1 = __PH + (__PM >>> 32); - _AHBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w1, _B_w1); + _AHBH_w0 = _A_w1 * _B_w1; //__add_128_128(QM, ALBH, AHBL); // add 128-bit value to 128-bit assume no carry-out { @@ -731,45 +578,11 @@ static long get_BID64_small_mantissa(final long sgn, int expon, long coeff) { long _ALBL_w0, _ALBL_w1, _ALBH_w0, _ALBH_w1, _QM2_w0, _QM2_w1; //__mul_64x64_to_128(out ALBH, A, B.w1); - { - final long __CX = _A; - final long __CY = _B_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A, _B_w1); + _ALBH_w0 = _A * _B_w1; //__mul_64x64_to_128(out ALBL, A, B.w0); - { - final long __CX = _A; - final long __CY = _B_w0; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A, _B_w0); + _ALBL_w0 = _A * _B_w0; Q_low_w0 = _ALBL_w0; //__add_128_64(out QM2, ALBH, ALBL.w1); @@ -881,45 +694,11 @@ public static long get_BID64(long sgn, int expon, long coeff) { long _ALBL_w0, _ALBL_w1, _ALBH_w0, _ALBH_w1, _QM2_w0, _QM2_w1; //__mul_64x64_to_128(out ALBH, A, B.w1); - { - final long __CX = _A; - final long __CY = _B_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A, _B_w1); + _ALBH_w0 = _A * _B_w1; //__mul_64x64_to_128(out ALBL, A, B.w0); - { - final long __CX = _A; - final long __CY = _B_w0; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A, _B_w0); + _ALBL_w0 = _A * _B_w0; Q_low_w0 = _ALBL_w0; //__add_128_64(out QM2, ALBH, ALBL.w1); From 11b6e08c0bc3416072be05dc957be2d0221d4fcf Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Wed, 28 Jun 2023 18:58:35 +0300 Subject: [PATCH 24/29] Java: Replace some other 64*64 multiplications. --- .../java/com/epam/deltix/dfp/JavaImplDiv.java | 4 +- .../java/com/epam/deltix/dfp/JavaImplFma.java | 455 ++---------------- .../com/epam/deltix/dfp/JavaImplParse.java | 76 +-- .../com/epam/deltix/dfp/JavaImplRound.java | 170 +------ 4 files changed, 82 insertions(+), 623 deletions(-) diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplDiv.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplDiv.java index 5a205292..ce69629f 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplDiv.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplDiv.java @@ -366,7 +366,7 @@ else if (exponent_x < 0) //__mul_64x64_to_128 (CT, Q, bid_reciprocals10_64[nzeros]); CT_w1 = Mul64Impl.unsignedMultiplyHigh(Q, bid_reciprocals10_64[nzeros]); - CT_w0 = Q * bid_reciprocals10_64[nzeros]; + // CT_w0 = Q * bid_reciprocals10_64[nzeros]; // @optimization // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[nzeros]; @@ -415,7 +415,7 @@ else if (exponent_x < 0) if (nzeros != 0) { //__mul_64x64_to_128 (CT, Q, bid_reciprocals10_64[nzeros]); CT_w1 = Mul64Impl.unsignedMultiplyHigh(Q, bid_reciprocals10_64[nzeros]); - CT_w0 = Q * bid_reciprocals10_64[nzeros]; + // CT_w0 = Q * bid_reciprocals10_64[nzeros]; // @optimization // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[nzeros]; diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java index 12037025..7c6cd70a 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java @@ -369,23 +369,8 @@ else if (exponent_x < 0) // get 128-bit product: coefficient_x*coefficient_y //__mul_64x64_to_128(P, coefficient_x, coefficient_y); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coefficient_x >>> 32; - __CXL = LONG_LOW_PART & coefficient_x; - __CYH = coefficient_y >>> 32; - __CYL = LONG_LOW_PART & coefficient_y; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P_w1 = __PH + (__PM >>> 32); - P_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + P_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_x, coefficient_y); + P_w0 = coefficient_x * coefficient_y; // tighten binary range of P: leading bit is 2^bp @@ -465,22 +450,9 @@ else if (exponent_x < 0) // align coeff_x, CYh //__mul_64x64_to_128(CZ, coefficient_z, bid_power10_table_128_flat[(extra_digits << 1) /*+ 0*/]); { - long __CY = bid_power10_table_128_BID_UINT128[(extra_digits << 1) /*+ 0*/]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coefficient_z >>> 32; - __CXL = LONG_LOW_PART & coefficient_z; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CZ_w1 = __PH + (__PM >>> 32); - CZ_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_power10_table_128_BID_UINT128[(extra_digits << 1) /*+ 0*/]; + CZ_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_z, __CY); + CZ_w0 = coefficient_z * __CY; } if (sign_z == (sign_y ^ sign_x)) { @@ -755,22 +727,9 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT, coefficient_a, bid_reciprocals10_64[extra_digits]); { - long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coefficient_a >>> 32; - __CXL = LONG_LOW_PART & coefficient_a; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_reciprocals10_64[extra_digits]; + CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_a, __CY); + CT_w0 = coefficient_a * __CY; } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 @@ -825,22 +784,9 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT, coefficient_b, bid_reciprocals10_64[extra_digits]); { - long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coefficient_b >>> 32; - __CXL = LONG_LOW_PART & coefficient_b; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_reciprocals10_64[extra_digits]; + CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, __CY); + CT_w0 = coefficient_b * __CY; } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 @@ -863,24 +809,8 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 saved_ca = saved_ca + T1; //__mul_64x64_to_128(CA, saved_ca, 0x3333333333333334L); - { - long __CY = 0x3333333333333334L; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = saved_ca >>> 32; - __CXL = LONG_LOW_PART & saved_ca; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CA_w1 = __PH + (__PM >>> 32); - CA_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CA_w1 = Mul64Impl.unsignedMultiplyHigh(saved_ca, 0x3333333333333334L); + // CA_w0 = saved_ca * 0x3333333333333334L; // @optimization //reciprocals10_64[1]); coefficient_a = CA_w1 >>> 1; @@ -902,22 +832,9 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT, coefficient_b, bid_reciprocals10_64[extra_digits]); { - long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coefficient_b >>> 32; - __CXL = LONG_LOW_PART & coefficient_b; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_reciprocals10_64[extra_digits]; + CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, __CY); + CT_w0 = coefficient_b * __CY; } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 @@ -940,21 +857,8 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 //__mul_64x64_to_128(CT_new, coefficient_b, bid_reciprocals10_64[extra_digits]); { long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coefficient_b >>> 32; - __CXL = LONG_LOW_PART & coefficient_b; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_new_w1 = __PH + (__PM >>> 32); - CT_new_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + CT_new_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, __CY); + CT_new_w0 = coefficient_b * __CY; } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 @@ -1046,80 +950,20 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 long _ALBL_w1, _ALBH_w1, _AHBL_w1, _AHBH_w1, _QM_w1, _QM2_w1; //__mul_64x64_to_128(ALBH, (A)_w0, (B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w1); + _ALBH_w0 = _A_w0 * _B_w1; //__mul_64x64_to_128(AHBL, (B)_w0, (A)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _B_w0 >>> 32; - __CXL = LONG_LOW_PART & _B_w0; - __CYH = _A_w1 >>> 32; - __CYL = LONG_LOW_PART & _A_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBL_w1 = __PH + (__PM >>> 32); - _AHBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBL_w1 = Mul64Impl.unsignedMultiplyHigh(_B_w0, _A_w1); + _AHBL_w0 = _B_w0 * _A_w1; //__mul_64x64_to_128(ALBL, (A)_w0, (B)_w0); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w0 >>> 32; - __CYL = LONG_LOW_PART & _B_w0; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w0); + _ALBL_w0 = _A_w0 * _B_w0; //__mul_64x64_to_128(AHBH, (A)_w1,(B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w1 >>> 32; - __CXL = LONG_LOW_PART & _A_w1; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBH_w1 = __PH + (__PM >>> 32); - _AHBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w1, _B_w1); + _AHBH_w0 = _A_w1 * _B_w1; //__add_128_128(QM, ALBH, AHBL); // add 128-bit value to 128-bit assume no carry-out { @@ -1266,80 +1110,20 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 long _ALBL_w1, _ALBH_w1, _AHBL_w1, _AHBH_w1, _QM_w1, _QM2_w1; //__mul_64x64_to_128(ALBH, (A)_w0, (B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w1); + _ALBH_w0 = _A_w0 * _B_w1; //__mul_64x64_to_128(AHBL, (B)_w0, (A)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _B_w0 >>> 32; - __CXL = LONG_LOW_PART & _B_w0; - __CYH = _A_w1 >>> 32; - __CYL = LONG_LOW_PART & _A_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBL_w1 = __PH + (__PM >>> 32); - _AHBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBL_w1 = Mul64Impl.unsignedMultiplyHigh(_B_w0, _A_w1); + _AHBL_w0 = _B_w0 * _A_w1; //__mul_64x64_to_128(ALBL, (A)_w0, (B)_w0); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w0 >>> 32; - __CYL = LONG_LOW_PART & _B_w0; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w0); + _ALBL_w0 = _A_w0 * _B_w0; //__mul_64x64_to_128(AHBH, (A)_w1,(B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w1 >>> 32; - __CXL = LONG_LOW_PART & _A_w1; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBH_w1 = __PH + (__PM >>> 32); - _AHBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w1, _B_w1); + _AHBH_w0 = _A_w1 * _B_w1; //__add_128_128(QM, ALBH, AHBL); // add 128-bit value to 128-bit assume no carry-out { @@ -1441,80 +1225,20 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 long _ALBL_w1, _ALBH_w1, _AHBL_w1, _AHBH_w1, _QM_w1, _QM2_w1; //__mul_64x64_to_128(ALBH, (A)_w0, (B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w1); + _ALBH_w0 = _A_w0 * _B_w1; //__mul_64x64_to_128(AHBL, (B)_w0, (A)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _B_w0 >>> 32; - __CXL = LONG_LOW_PART & _B_w0; - __CYH = _A_w1 >>> 32; - __CYL = LONG_LOW_PART & _A_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBL_w1 = __PH + (__PM >>> 32); - _AHBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBL_w1 = Mul64Impl.unsignedMultiplyHigh(_B_w0, _A_w1); + _AHBL_w0 = _B_w0 * _A_w1; //__mul_64x64_to_128(ALBL, (A)_w0, (B)_w0); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w0 >>> 32; - __CYL = LONG_LOW_PART & _B_w0; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w0); + _ALBL_w0 = _A_w0 * _B_w0; //__mul_64x64_to_128(AHBH, (A)_w1,(B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w1 >>> 32; - __CXL = LONG_LOW_PART & _A_w1; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBH_w1 = __PH + (__PM >>> 32); - _AHBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w1, _B_w1); + _AHBH_w0 = _A_w1 * _B_w1; //__add_128_128(QM, ALBH, AHBL); // add 128-bit value to 128-bit assume no carry-out { @@ -1690,80 +1414,20 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 long _ALBL_w1, _ALBH_w1, _AHBL_w1, _AHBH_w1, _QM_w1, _QM2_w1; //__mul_64x64_to_128(ALBH, (A)_w0, (B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w1); + _ALBH_w0 = _A_w0 * _B_w1; //__mul_64x64_to_128(AHBL, (B)_w0, (A)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _B_w0 >>> 32; - __CXL = LONG_LOW_PART & _B_w0; - __CYH = _A_w1 >>> 32; - __CYL = LONG_LOW_PART & _A_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBL_w1 = __PH + (__PM >>> 32); - _AHBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBL_w1 = Mul64Impl.unsignedMultiplyHigh(_B_w0, _A_w1); + _AHBL_w0 = _B_w0 * _A_w1; //__mul_64x64_to_128(ALBL, (A)_w0, (B)_w0); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w0 >>> 32; - __CYL = LONG_LOW_PART & _B_w0; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w0); + _ALBL_w0 = _A_w0 * _B_w0; //__mul_64x64_to_128(AHBH, (A)_w1,(B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w1 >>> 32; - __CXL = LONG_LOW_PART & _A_w1; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBH_w1 = __PH + (__PM >>> 32); - _AHBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w1, _B_w1); + _AHBH_w0 = _A_w1 * _B_w1; //__add_128_128(QM, ALBH, AHBL); // add 128-bit value to 128-bit assume no carry-out { @@ -2169,23 +1833,8 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 final long /*BID_UINT64*/ ALBH_L = S * CY_w1; //__mul_64x64_to_128(CY_L, S, CY_w0); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = S >>> 32; - __CXL = LONG_LOW_PART & S; - __CYH = CY_w0 >>> 32; - __CYL = LONG_LOW_PART & CY_w0; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CY_L_w1 = __PH + (__PM >>> 32); - CY_L_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CY_L_w1 = Mul64Impl.unsignedMultiplyHigh(S, CY_w0); + CY_L_w0 = S * CY_w0; CY_L_w1 += ALBH_L; } diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplParse.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplParse.java index f82b3788..8fab574f 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplParse.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplParse.java @@ -509,41 +509,11 @@ public static long get_BID64_UF(final long sgn, final int expon, long coeff, fin long _ALBL_w0, _ALBL_w1, _ALBH_w0, _ALBH_w1, _QM2_w0, _QM2_w1; //__mul_64x64_to_128(out ALBH, A, B.w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C128_w0 >>> 32; - __CXL = LONG_LOW_PART & C128_w0; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(C128_w0, _B_w1); + _ALBH_w0 = C128_w0 * _B_w1; //__mul_64x64_to_128(out ALBL, A, B.w0); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C128_w0 >>> 32; - __CXL = LONG_LOW_PART & C128_w0; - __CYH = _B_w0 >>> 32; - __CYL = LONG_LOW_PART & _B_w0; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(C128_w0, _B_w0); + _ALBL_w0 = C128_w0 * _B_w0; Q_low_w0 = _ALBL_w0; //__add_128_64(out QM2, ALBH, ALBL.w1); @@ -688,41 +658,11 @@ public static long get_BID64(long sgn, int expon, long coeff, int rmode, Floatin long _ALBL_w0, _ALBL_w1, _ALBH_w0, _ALBH_w1, _QM2_w0, _QM2_w1; //__mul_64x64_to_128(out ALBH, A, B.w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coeff >>> 32; - __CXL = LONG_LOW_PART & coeff; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(coeff, _B_w1); + _ALBH_w0 = coeff * _B_w1; //__mul_64x64_to_128(out ALBL, A, B.w0); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coeff >>> 32; - __CXL = LONG_LOW_PART & coeff; - __CYH = _B_w0 >>> 32; - __CYL = LONG_LOW_PART & _B_w0; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(coeff, _B_w0); + _ALBL_w0 = coeff * _B_w0; Q_low_w0 = _ALBL_w0; //__add_128_64(out QM2, ALBH, ALBL.w1); diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplRound.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplRound.java index 64bf8aa1..5bb970c4 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplRound.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplRound.java @@ -137,21 +137,8 @@ public static long bid64_round_integral_exact(long /*BID_UINT64*/ x, final int r //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // if (0 < f* < 10^(-x)) then the result is a midpoint @@ -240,21 +227,8 @@ public static long bid64_round_integral_exact(long /*BID_UINT64*/ x, final int r //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // if (0 < f* < 10^(-x)) then the result is a midpoint @@ -332,21 +306,8 @@ public static long bid64_round_integral_exact(long /*BID_UINT64*/ x, final int r //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // C* = floor(C*) (logical right shift; C has p decimal digits, @@ -398,21 +359,8 @@ public static long bid64_round_integral_exact(long /*BID_UINT64*/ x, final int r //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // C* = floor(C*) (logical right shift; C has p decimal digits, @@ -464,21 +412,8 @@ public static long bid64_round_integral_exact(long /*BID_UINT64*/ x, final int r //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // C* = floor(C*) (logical right shift; C has p decimal digits, @@ -607,21 +542,8 @@ public static long bid64_round_integral_nearest_even(long /*BID_UINT64*/ x, fina //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // if (0 < f* < 10^(-x)) then the result is a midpoint @@ -756,21 +678,8 @@ public static long bid64_round_integral_negative(long /*BID_UINT64*/ x, final in //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // C* = floor(C*) (logical right shift; C has p decimal digits, @@ -905,21 +814,8 @@ public static long bid64_round_integral_positive(long /*BID_UINT64*/ x, final in //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // C* = floor(C*) (logical right shift; C has p decimal digits, @@ -1048,21 +944,8 @@ public static long bid64_round_integral_zero(long /*BID_UINT64*/ x, final int rn //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // C* = floor(C*) (logical right shift; C has p decimal digits, @@ -1186,21 +1069,8 @@ public static long bid64_round_integral_nearest_away(long /*BID_UINT64*/ x, fina //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + // P128_w0 = C1 * __CY; // @optimization } // if (0 < f* < 10^(-x)) then the result is a midpoint From 8593ac487ad29583cd3858fa8929335ae1661c67 Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Wed, 28 Jun 2023 19:00:43 +0300 Subject: [PATCH 25/29] Java: Tiny code optimization and clearing. --- .../java/com/epam/deltix/dfp/JavaImplAdd.java | 28 +++++-- .../java/com/epam/deltix/dfp/JavaImplCmp.java | 77 +++++++++++++------ .../com/epam/deltix/dfp/JavaImplMinMax.java | 28 +++++-- 3 files changed, 95 insertions(+), 38 deletions(-) diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplAdd.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplAdd.java index 16b270da..cb648b7f 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplAdd.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplAdd.java @@ -386,8 +386,11 @@ else if ((UnsignedLong.isLess(coefficient_a, bid_power10_table_128_w0[18]))) // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT, coefficient_a, bid_reciprocals10_64[extra_digits]); - CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_a, bid_reciprocals10_64[extra_digits]); - CT_w0 = coefficient_a * bid_reciprocals10_64[extra_digits]; + { + final long __CY = bid_reciprocals10_64[extra_digits]; + CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_a, __CY); + CT_w0 = coefficient_a * __CY; + } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[extra_digits]; @@ -425,8 +428,11 @@ else if ((UnsignedLong.isLess(coefficient_a, bid_power10_table_128_w0[18]))) // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT, coefficient_b, bid_reciprocals10_64[extra_digits]); - CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, bid_reciprocals10_64[extra_digits]); - CT_w0 = coefficient_b * bid_reciprocals10_64[extra_digits]; + { + final long __CY = bid_reciprocals10_64[extra_digits]; + CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, __CY); + CT_w0 = coefficient_b * __CY; + } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[extra_digits]; @@ -461,8 +467,11 @@ else if ((UnsignedLong.isLess(coefficient_a, bid_power10_table_128_w0[18]))) // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT, coefficient_b, bid_reciprocals10_64[extra_digits]); - CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, bid_reciprocals10_64[extra_digits]); - CT_w0 = coefficient_b * bid_reciprocals10_64[extra_digits]; + { + final long __CY = bid_reciprocals10_64[extra_digits]; + CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, __CY); + CT_w0 = coefficient_b * __CY; + } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[extra_digits]; C0_64 = CT_w1 >>> amount; @@ -479,8 +488,11 @@ else if ((UnsignedLong.isLess(coefficient_a, bid_power10_table_128_w0[18]))) // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT_new, coefficient_b, bid_reciprocals10_64[extra_digits]); - CT_new_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, bid_reciprocals10_64[extra_digits]); - CT_new_w0 = coefficient_b * bid_reciprocals10_64[extra_digits]; + { + final long __CY = bid_reciprocals10_64[extra_digits]; + CT_new_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, __CY); + CT_new_w0 = coefficient_b * __CY; + } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[extra_digits]; C0_64 = CT_new_w1 >>> amount; diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java index 50c56eb3..b87963c3 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java @@ -133,8 +133,11 @@ public static int compare(final long /*BID_UINT64*/ x, final long /*BID_UINT64*/ if (exp_x > exp_y) { // to simplify the loop below, // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x - exp_y]); - sig_n_prime_w0 = sig_x * bid_mult_factor[exp_x - exp_y]; + { + final long __CY = bid_mult_factor[exp_x - exp_y]; + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_n_prime_w0 = sig_x * __CY; + } // if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_y)) { @@ -147,8 +150,11 @@ public static int compare(final long /*BID_UINT64*/ x, final long /*BID_UINT64*/ } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, bid_mult_factor[exp_y - exp_x]); - sig_n_prime_w0 = sig_y * bid_mult_factor[exp_y - exp_x]; + { + final long __CY = bid_mult_factor[exp_y - exp_x]; + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, __CY); + sig_n_prime_w0 = sig_y * __CY; + } // if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_x)) { @@ -360,8 +366,11 @@ public static boolean bid64_quiet_greater(final long /*BID_UINT64*/ x, final lon if (exp_x > exp_y) { // to simplify the loop below, // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x - exp_y]); - sig_n_prime_w0 = sig_x * bid_mult_factor[exp_x - exp_y]; + { + final long __CY = bid_mult_factor[exp_x - exp_y]; + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_n_prime_w0 = sig_x * __CY; + } // if postitive, return whichever significand is larger (converse if neg.) if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_y)) { @@ -372,8 +381,11 @@ public static boolean bid64_quiet_greater(final long /*BID_UINT64*/ x, final lon } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, bid_mult_factor[exp_y - exp_x]); - sig_n_prime_w0 = sig_y * bid_mult_factor[exp_y - exp_x]; + { + final long __CY = bid_mult_factor[exp_y - exp_x]; + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, __CY); + sig_n_prime_w0 = sig_y * __CY; + } // if positive, return whichever significand is larger // (converse if negative) @@ -489,8 +501,11 @@ public static boolean bid64_quiet_greater_equal(final long /*BID_UINT64*/ x, fin if (exp_x > exp_y) { // to simplify the loop below, // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x - exp_y]); - sig_n_prime_w0 = sig_x * bid_mult_factor[exp_x - exp_y]; + { + final long __CY = bid_mult_factor[exp_x - exp_y]; + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_n_prime_w0 = sig_x * __CY; + } // return 1 if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_y)) { @@ -503,8 +518,11 @@ public static boolean bid64_quiet_greater_equal(final long /*BID_UINT64*/ x, fin } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, bid_mult_factor[exp_y - exp_x]); - sig_n_prime_w0 = sig_y * bid_mult_factor[exp_y - exp_x]; + { + final long __CY = bid_mult_factor[exp_y - exp_x]; + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, __CY); + sig_n_prime_w0 = sig_y * __CY; + } // return 0 if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_x)) { @@ -623,8 +641,11 @@ public static boolean bid64_quiet_less(final long /*BID_UINT64*/ x, final long / if (exp_x > exp_y) { // to simplify the loop below, // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x - exp_y]); - sig_n_prime_w0 = sig_x * bid_mult_factor[exp_x - exp_y]; + { + final long __CY = bid_mult_factor[exp_x - exp_y]; + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_n_prime_w0 = sig_x * __CY; + } // return 0 if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_y)) { @@ -637,8 +658,11 @@ public static boolean bid64_quiet_less(final long /*BID_UINT64*/ x, final long / } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, bid_mult_factor[exp_y - exp_x]); - sig_n_prime_w0 = sig_y * bid_mult_factor[exp_y - exp_x]; + { + final long __CY = bid_mult_factor[exp_y - exp_x]; + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, __CY); + sig_n_prime_w0 = sig_y * __CY; + } // return 0 if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_x)) { @@ -756,8 +780,11 @@ public static boolean bid64_quiet_less_equal(final long /*BID_UINT64*/ x, final if (exp_x > exp_y) { // to simplify the loop below, // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x - exp_y]); - sig_n_prime_w0 = sig_x * bid_mult_factor[exp_x - exp_y]; + { + final long __CY = bid_mult_factor[exp_x - exp_y]; + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_n_prime_w0 = sig_x * __CY; + } // return 1 if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_y)) { @@ -770,8 +797,11 @@ public static boolean bid64_quiet_less_equal(final long /*BID_UINT64*/ x, final } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, bid_mult_factor[exp_y - exp_x]); - sig_n_prime_w0 = sig_y * bid_mult_factor[exp_y - exp_x]; + { + final long __CY = bid_mult_factor[exp_y - exp_x]; + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, __CY); + sig_n_prime_w0 = sig_y * __CY; + } // return 1 if values are equal if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_x)) { @@ -926,8 +956,11 @@ public static boolean bid64_isNormal(final long /*BID_UINT64*/ x) { // if (exp_x - 398 = -383) the number may be subnormal if (exp_x < 15) { // __mul_64x64_to_128MACH (sig_x_prime, sig_x, bid_mult_factor[exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - sig_x_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x]); - sig_x_prime_w0 = sig_x * bid_mult_factor[exp_x]; + { + final long __CY = bid_mult_factor[exp_x]; + sig_x_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_x_prime_w0 = sig_x * __CY; + } // normal return sig_x_prime_w1 != 0 || UnsignedLong.isGreater(sig_x_prime_w0, 999999999999999L); diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java index 69195e1e..5a2af8fd 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java @@ -142,8 +142,11 @@ public static long bid64_min_fix_nan(long /*BID_UINT64*/ x, long /*BID_UINT64*/ // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x - exp_y]); - sig_n_prime_w0 = sig_x * bid_mult_factor[exp_x - exp_y]; + { + final long __CY = bid_mult_factor[exp_x - exp_y]; + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_n_prime_w0 = sig_x * __CY; + } // if postitive, return whichever significand is larger @@ -157,8 +160,11 @@ public static long bid64_min_fix_nan(long /*BID_UINT64*/ x, long /*BID_UINT64*/ } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, bid_mult_factor[exp_y - exp_x]); - sig_n_prime_w0 = sig_y * bid_mult_factor[exp_y - exp_x]; + { + final long __CY = bid_mult_factor[exp_y - exp_x]; + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, __CY); + sig_n_prime_w0 = sig_y * __CY; + } // if postitive, return whichever significand is larger (converse if negative) if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_x)) { @@ -302,8 +308,11 @@ public static long bid64_max_fix_nan(long /*BID_UINT64*/ x, long /*BID_UINT64*/ if (exp_x > exp_y) { // to simplify the loop below, // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, bid_mult_factor[exp_x - exp_y]); - sig_n_prime_w0 = sig_x * bid_mult_factor[exp_x - exp_y]; + { + final long __CY = bid_mult_factor[exp_x - exp_y]; + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_n_prime_w0 = sig_x * __CY; + } // if postitive, return whichever significand is larger // (converse if negative) @@ -315,8 +324,11 @@ public static long bid64_max_fix_nan(long /*BID_UINT64*/ x, long /*BID_UINT64*/ } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 - sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, bid_mult_factor[exp_y - exp_x]); - sig_n_prime_w0 = sig_y * bid_mult_factor[exp_y - exp_x]; + { + final long __CY = bid_mult_factor[exp_y - exp_x]; + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, __CY); + sig_n_prime_w0 = sig_y * __CY; + } // if postitive, return whichever significand is larger (converse if negative) if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_x)) { From 97bf452513594c32ebb521f6c245d7b80f14c99b Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Wed, 28 Jun 2023 19:04:44 +0300 Subject: [PATCH 26/29] Java: Enhance code. --- .../java/com/epam/deltix/dfp/JavaImplFma.java | 2 +- .../com/epam/deltix/dfp/JavaImplRound.java | 20 +++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java index 7c6cd70a..a76f8e84 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java @@ -856,7 +856,7 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT_new, coefficient_b, bid_reciprocals10_64[extra_digits]); { - long __CY = bid_reciprocals10_64[extra_digits]; + final long __CY = bid_reciprocals10_64[extra_digits]; CT_new_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, __CY); CT_new_w0 = coefficient_b * __CY; } diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplRound.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplRound.java index 5bb970c4..e661dfa3 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplRound.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplRound.java @@ -136,7 +136,7 @@ public static long bid64_round_integral_exact(long /*BID_UINT64*/ x, final int r //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; + final long __CY = bid_ten2mk64[ind - 1]; P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); P128_w0 = C1 * __CY; } @@ -226,7 +226,7 @@ public static long bid64_round_integral_exact(long /*BID_UINT64*/ x, final int r //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; + final long __CY = bid_ten2mk64[ind - 1]; P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); P128_w0 = C1 * __CY; } @@ -305,7 +305,7 @@ public static long bid64_round_integral_exact(long /*BID_UINT64*/ x, final int r //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; + final long __CY = bid_ten2mk64[ind - 1]; P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); P128_w0 = C1 * __CY; } @@ -358,7 +358,7 @@ public static long bid64_round_integral_exact(long /*BID_UINT64*/ x, final int r //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; + final long __CY = bid_ten2mk64[ind - 1]; P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); P128_w0 = C1 * __CY; } @@ -411,7 +411,7 @@ public static long bid64_round_integral_exact(long /*BID_UINT64*/ x, final int r //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; + final long __CY = bid_ten2mk64[ind - 1]; P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); P128_w0 = C1 * __CY; } @@ -541,7 +541,7 @@ public static long bid64_round_integral_nearest_even(long /*BID_UINT64*/ x, fina //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; + final long __CY = bid_ten2mk64[ind - 1]; P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); P128_w0 = C1 * __CY; } @@ -677,7 +677,7 @@ public static long bid64_round_integral_negative(long /*BID_UINT64*/ x, final in //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; + final long __CY = bid_ten2mk64[ind - 1]; P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); P128_w0 = C1 * __CY; } @@ -813,7 +813,7 @@ public static long bid64_round_integral_positive(long /*BID_UINT64*/ x, final in //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; + final long __CY = bid_ten2mk64[ind - 1]; P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); P128_w0 = C1 * __CY; } @@ -943,7 +943,7 @@ public static long bid64_round_integral_zero(long /*BID_UINT64*/ x, final int rn //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; + final long __CY = bid_ten2mk64[ind - 1]; P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); P128_w0 = C1 * __CY; } @@ -1068,7 +1068,7 @@ public static long bid64_round_integral_nearest_away(long /*BID_UINT64*/ x, fina //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; + final long __CY = bid_ten2mk64[ind - 1]; P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); // P128_w0 = C1 * __CY; // @optimization } From 31e0f94b41a78ec8610daac0ca435b6e26cb3a58 Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Wed, 28 Jun 2023 19:23:40 +0300 Subject: [PATCH 27/29] Java: Replace next part of 64*64 multiplications --- .../com/epam/deltix/dfp/JavaImplCast.java | 64 ++---- .../epam/deltix/dfp/JavaImplCastBinary64.java | 204 +++--------------- .../java/com/epam/deltix/dfp/JavaImplFma.java | 45 +--- 3 files changed, 42 insertions(+), 271 deletions(-) diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCast.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCast.java index 6dedbc61..0bf2e9a0 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCast.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCast.java @@ -92,19 +92,9 @@ private JavaImplCast() { //__mul_64x64_to_128MACH(C, C1, bid_ten2k64[20 - q]); { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = C1 >>> 32; - CXL = LONG_LOW_PART & C1; - CYH = bid_ten2k64[20 - q] >>> 32; - CYL = LONG_LOW_PART & bid_ten2k64[20 - q]; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - C_w1 = PH + (PM >>> 32); - C_w0 = (PM << 32) + (LONG_LOW_PART & PL); + final long __CY = bid_ten2k64[20 - q]; + C_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + C_w0 = C1 * __CY; } // Note: C1 * 10^(11-q) has 19 or 20 digits; 0x5000000000000000a, has 20 @@ -128,19 +118,9 @@ private JavaImplCast() { //__mul_64x64_to_128MACH(C, C1, bid_ten2k64[20 - q]); { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = C1 >>> 32; - CXL = LONG_LOW_PART & C1; - CYH = bid_ten2k64[20 - q] >>> 32; - CYL = LONG_LOW_PART & bid_ten2k64[20 - q]; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - C_w1 = PH + (PM >>> 32); - C_w0 = (PM << 32) + (LONG_LOW_PART & PL); + final long __CY = bid_ten2k64[20 - q]; + C_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + C_w0 = C1 * __CY; } if (UnsignedLong.isGreaterOrEqual(C_w1, 0x05L)) { @@ -180,19 +160,9 @@ private JavaImplCast() { //__mul_64x64_to_128MACH(P128, C1, bid_ten2mk64[ind - 1]); { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = C1 >>> 32; - CXL = LONG_LOW_PART & C1; - CYH = bid_ten2mk64[ind - 1] >>> 32; - CYL = LONG_LOW_PART & bid_ten2mk64[ind - 1]; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - P128_w1 = PH + (PM >>> 32); - P128_w0 = (PM << 32) + (LONG_LOW_PART & PL); + final long __CY = bid_ten2mk64[ind - 1]; + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } Cstar = P128_w1; @@ -624,19 +594,9 @@ public static long bid64_from_int64(final long /*BID_SINT64*/ x, final int rnd_m //__mul_64x64_to_128MACH(__P128, C, bid_Kx64[__ind]); { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = C >>> 32; - CXL = LONG_LOW_PART & C; - CYH = bid_Kx64[__ind] >>> 32; - CYL = LONG_LOW_PART & bid_Kx64[__ind]; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - __P128_w1 = PH + (PM >>> 32); - __P128_w0 = (PM << 32) + (LONG_LOW_PART & PL); + final long __CY = bid_Kx64[__ind]; + __P128_w1 = Mul64Impl.unsignedMultiplyHigh(C, __CY); + __P128_w0 = C * __CY; } // calculate C* = floor (__P128) and f* diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCastBinary64.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCastBinary64.java index 5319855f..b479dda2 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCastBinary64.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCastBinary64.java @@ -200,66 +200,14 @@ public static long binary64_to_bid64(double x, final int rnd_mode/*, final JavaI { long /*BID_UINT128*/ lP0_w0, lP0_w1, lP1_w0, lP1_w1, lP2_w0, lP2_w1, lP3_w0, lP3_w1; long /*BID_UINT64*/ lC; - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w0 >>> 32; - CXL = LONG_LOW_PART & c_w0; - CYH = r_w0 >>> 32; - CYL = LONG_LOW_PART & r_w0; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP0_w1 = PH + (PM >>> 32); - lP0_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w0 >>> 32; - CXL = LONG_LOW_PART & c_w0; - CYH = r_w1 >>> 32; - CYL = LONG_LOW_PART & r_w1; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP1_w1 = PH + (PM >>> 32); - lP1_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w0 >>> 32; - CXL = LONG_LOW_PART & c_w0; - CYH = r_w2 >>> 32; - CYL = LONG_LOW_PART & r_w2; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP2_w1 = PH + (PM >>> 32); - lP2_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w0 >>> 32; - CXL = LONG_LOW_PART & c_w0; - CYH = r_w3 >>> 32; - CYL = LONG_LOW_PART & r_w3; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP3_w1 = PH + (PM >>> 32); - lP3_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } + lP0_w1 = Mul64Impl.unsignedMultiplyHigh(c_w0, r_w0); + lP0_w0 = c_w0 * r_w0; + lP1_w1 = Mul64Impl.unsignedMultiplyHigh(c_w0, r_w1); + lP1_w0 = c_w0 * r_w1; + lP2_w1 = Mul64Impl.unsignedMultiplyHigh(c_w0, r_w2); + lP2_w0 = c_w0 * r_w2; + lP3_w1 = Mul64Impl.unsignedMultiplyHigh(c_w0, r_w3); + lP3_w0 = c_w0 * r_w3; P0_w0 = lP0_w0; { long /*BID_UINT64*/ X1 = lP1_w0; @@ -281,66 +229,14 @@ public static long binary64_to_bid64(double x, final int rnd_mode/*, final JavaI { long /*BID_UINT128*/ lP0_w0, lP0_w1, lP1_w0, lP1_w1, lP2_w0, lP2_w1, lP3_w0, lP3_w1; long /*BID_UINT64*/ lC; - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w0 >>> 32; - CYL = LONG_LOW_PART & r_w0; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP0_w1 = PH + (PM >>> 32); - lP0_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w1 >>> 32; - CYL = LONG_LOW_PART & r_w1; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP1_w1 = PH + (PM >>> 32); - lP1_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w2 >>> 32; - CYL = LONG_LOW_PART & r_w2; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP2_w1 = PH + (PM >>> 32); - lP2_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w3 >>> 32; - CYL = LONG_LOW_PART & r_w3; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP3_w1 = PH + (PM >>> 32); - lP3_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } + lP0_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w0); + lP0_w0 = c_w1 * r_w0; + lP1_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w1); + lP1_w0 = c_w1 * r_w1; + lP2_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w2); + lP2_w0 = c_w1 * r_w2; + lP3_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w3); + lP3_w0 = c_w1 * r_w3; P1_w0 = lP0_w0; { long /*BID_UINT64*/ X1 = lP1_w0; @@ -510,69 +406,17 @@ public static double bid64_to_binary64(final long /*BID_UINT64*/ x, final int rn long /*BID_UINT128*/ lP0_w0, lP0_w1, lP1_w0, lP1_w1, lP2_w0, lP2_w1, lP3_w0, lP3_w1; long /*BID_UINT64*/ lC; //__mul_64x64_to_128(lP0, c_w1, r_w0); - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w0 >>> 32; - CYL = LONG_LOW_PART & r_w0; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP0_w1 = PH + (PM >>> 32); - lP0_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } + lP0_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w0); + lP0_w0 = c_w1 * r_w0; //__mul_64x64_to_128(lP1, c_w1, r_w1); - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w1 >>> 32; - CYL = LONG_LOW_PART & r_w1; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP1_w1 = PH + (PM >>> 32); - lP1_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } + lP1_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w1); + lP1_w0 = c_w1 * r_w1; //__mul_64x64_to_128(lP2, c_w1, r_w2); - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w2 >>> 32; - CYL = LONG_LOW_PART & r_w2; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP2_w1 = PH + (PM >>> 32); - lP2_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } + lP2_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w2); + lP2_w0 = c_w1 * r_w2; //__mul_64x64_to_128(lP3, c_w1, r_w3); - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w3 >>> 32; - CYL = LONG_LOW_PART & r_w3; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP3_w1 = PH + (PM >>> 32); - lP3_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } + lP3_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w3); + lP3_w0 = c_w1 * r_w3; z_w0 = lP0_w0; //__add_carry_out(P_w1,lC,lP1_w0,lP0_w1); { diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java index a76f8e84..4716d528 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java @@ -1595,19 +1595,8 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 //__mul_64x64_to_128(CX, coefficient_x, bid_power10_table_128_flat[(diff_dec_expon << 1) /*+ 0*/]); { final long /*BID_UINT64*/ __CY = bid_power10_table_128_BID_UINT128[(diff_dec_expon << 1) /*+ 0*/]; - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = coefficient_x >>> 32; - CXL = LONG_LOW_PART & coefficient_x; - CYH = __CY >>> 32; - CYL = LONG_LOW_PART & __CY; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - CX_w1 = PH + (PM >>> 32); - CX_w0 = (PM << 32) + (LONG_LOW_PART & PL); + CX_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_x, __CY); + CX_w0 = coefficient_x * __CY; } if (sign_x == sign_y) { @@ -1717,19 +1706,8 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 //__mul_64x64_to_128(CX, coefficient_x, bid_power10_table_128_flat[(diff_dec2 << 1) /*+ 0*/]); { long /*BID_UINT64*/ __CY = bid_power10_table_128_BID_UINT128[(diff_dec2 << 1) /*+ 0*/]; - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = coefficient_x >>> 32; - CXL = LONG_LOW_PART & coefficient_x; - CYH = __CY >>> 32; - CYL = LONG_LOW_PART & __CY; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - CX_w1 = PH + (PM >>> 32); - CX_w0 = (PM << 32) + (LONG_LOW_PART & PL); + CX_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_x, __CY); + CX_w0 = coefficient_x * __CY; } @@ -1873,19 +1851,8 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 //__mul_64x64_to_128(F, coefficient_y, S); { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = coefficient_y >>> 32; - CXL = LONG_LOW_PART & coefficient_y; - CYH = S >>> 32; - CYL = LONG_LOW_PART & S; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - F_w1 = PH + (PM >>> 32); - F_w0 = (PM << 32) + (LONG_LOW_PART & PL); + F_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_y, S); + F_w0 = coefficient_y * S; } // fraction From 44dff610d1e08f4e53e6b893181d22b3d774e148 Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Wed, 28 Jun 2023 19:48:52 +0300 Subject: [PATCH 28/29] Java: Replace last 64*64 multiplications. --- .../java/com/epam/deltix/dfp/JavaImpl.java | 92 +++---------------- 1 file changed, 12 insertions(+), 80 deletions(-) diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java index 7868887c..5afc265e 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java @@ -1708,48 +1708,14 @@ private static long packUnderflow(final boolean isSigned, final int exponent, lo final long QM2_0; final long QM2_1; { - final long CXH; - final long CXL; - final long CYH; - final long CYL; - final long PL; - long PH; - long PM; - final long PM2; - CXH = (C128_0) >>> 32; - CXL = C128_0 & UINT32_MAX; - CYH = (bid_reciprocals10_128[extra_digits][1]) >>> 32; - CYL = bid_reciprocals10_128[extra_digits][1] & UINT32_MAX; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += (PM >>> 32); - PM = (PM & UINT32_MAX) + PM2 + (PL >>> 32); - ALBH_1 = PH + (PM >>> 32); - ALBH_0 = (PM << 32) + (PL & UINT32_MAX); + final long __CY = bid_reciprocals10_128[extra_digits][1]; + ALBH_1 = Mul64Impl.unsignedMultiplyHigh(C128_0, __CY); + ALBH_0 = C128_0 * __CY; } { - final long CXH; - final long CXL; - final long CYH; - final long CYL; - final long PL; - long PH; - long PM; - final long PM2; - CXH = ((C128_0)) >>> 32; - CXL = C128_0 & UINT32_MAX; - CYH = (bid_reciprocals10_128[extra_digits][0]) >>> 32; - CYL = bid_reciprocals10_128[extra_digits][0] & UINT32_MAX; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += (PM >>> 32); - PM = (PM & UINT32_MAX) + PM2 + (PL >>> 32); - ALBL_1 = PH + (PM >>> 32); - ALBL_0 = (PM << 32) + (PL & UINT32_MAX); + final long __CY = bid_reciprocals10_128[extra_digits][0]; + ALBL_1 = Mul64Impl.unsignedMultiplyHigh(C128_0, __CY); + ALBL_0 = C128_0 * __CY; } Q_low_0 = ALBL_0; { @@ -2091,48 +2057,14 @@ public static long pack(final long signMask, final int exponentIn, final long co final long QM2_0; final long QM2_1; { - final long CXH; - final long CXL; - final long CYH; - final long CYL; - final long PL; - long PH; - long PM; - final long PM2; - CXH = coefficient >>> 32; - CXL = (int) ((coefficient)); - CYH = bid_reciprocals10_128[extra_digits][1] >>> 32; - CYL = (int) bid_reciprocals10_128[extra_digits][1]; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += (PM >>> 32); - PM = (PM & 0xFFFFFFFFL) + PM2 + (PL >> 32); - ALBH_1 = PH + (PM >> 32); - ALBH_0 = (PM << 32) + (int) PL; + final long __CY = bid_reciprocals10_128[extra_digits][1]; + ALBH_1 = Mul64Impl.unsignedMultiplyHigh(coefficient, __CY); + ALBH_0 = coefficient * __CY; } { - final long CXH; - final long CXL; - final long CYH; - final long CYL; - final long PL; - long PH; - long PM; - final long PM2; - CXH = ((coefficient)) >>> 32; - CXL = (int) ((coefficient)); - CYH = bid_reciprocals10_128[extra_digits][0] >>> 32; - CYL = (int) bid_reciprocals10_128[extra_digits][0]; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += (PM >>> 32); - PM = (PM & 0xFFFFFFFFL) + PM2 + (PL >>> 32); - ALBL_1 = PH + (PM >>> 32); - ALBL_0 = (PM << 32) + (PL & 0xFFFFFFFFL); + final long __CY = bid_reciprocals10_128[extra_digits][0]; + ALBL_1 = Mul64Impl.unsignedMultiplyHigh(coefficient, __CY); + ALBL_0 = coefficient * __CY; } Q_low_0 = ALBL_0; { From 6966aff358b4439b1f0d553e3338ed463d1fc77f Mon Sep 17 00:00:00 2001 From: Andrei Davydov Date: Wed, 28 Jun 2023 19:52:24 +0300 Subject: [PATCH 29/29] Update version to 1.0.0 --- gradle.properties | 2 +- .../src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gradle.properties b/gradle.properties index 4a386302..437c347c 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,2 +1,2 @@ -version=0.12.11-SNAPSHOT +version=1.0.0-SNAPSHOT valueTypesVersion=0.9.4 \ No newline at end of file diff --git a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java index 4f354ad0..3c89a747 100644 --- a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java +++ b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java @@ -37,8 +37,8 @@ public void setUp() { @Benchmark public void mul0(Blackhole bh) { for (int i = 0; i < 1000; ++i) { - long __CX = decimalValues[i]; - long __CY = decimalValues[i + 1]; + final long __CX = decimalValues[i]; + final long __CY = decimalValues[i + 1]; long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; __CXH = __CX >>> 32; __CXL = LONG_LOW_PART & __CX;