diff --git a/.github/workflows/Build.yml b/.github/workflows/Build.yml index 0fdc6cb6..c5f881ed 100644 --- a/.github/workflows/Build.yml +++ b/.github/workflows/Build.yml @@ -146,18 +146,22 @@ jobs: uses: actions/setup-java@v3 with: distribution: 'adopt' - java-version: '8' + java-version: '19' - name: build run: | ./gradlew jar ./gradlew javadocJar + ./gradlew copyTestJars - uses: actions/upload-artifact@v3 with: name: java-jars path: | ./*java/dfp/build/libs/* ./*java/dfp-math/build/libs/* - ./*java/decimalDemo/build/libs/* + ./*java/dfp/testLibs/* + ./*java/dfp-math/testLibs/* + ./*java/dfpNativeTests/testLibs/* + ./*java/vtaTest/testLibs/* retention-days: 7 build-dotnet: @@ -190,20 +194,20 @@ jobs: test-java: runs-on: ${{ matrix.os }} - needs: [compress-native] + needs: [build-java] strategy: fail-fast: false matrix: os: [ 'ubuntu-latest', 'windows-2019', 'macos-latest'] - java: [ '8', '11', '15'] + java: [ '8', '11', '19'] steps: - uses: actions/checkout@v3 with: submodules: 'recursive' - - name: Download compress-native artifacts + - name: Download java-jars artifacts uses: actions/download-artifact@v3 with: - name: compress-native + name: java-jars - name: Setup java uses: actions/setup-java@v3 with: @@ -214,15 +218,7 @@ jobs: ./gradlew :java:systemInfo:runSystemInfo - name: test run: | - ./gradlew check - - name: Upload test results on failure - uses: actions/upload-artifact@v3 - if: ${{ failure() }} - with: - path: | - java/dfp/build/reports/* - java/dfp-math/build/reports/* - name: Java-{{ matrix.java }}-${{ matrix.os }}-TestReports + ./gradlew runTestJars test-dotnet: runs-on: ${{ matrix.os }} diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 594cb829..fd7c3672 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -190,18 +190,22 @@ jobs: uses: actions/setup-java@v3 with: distribution: 'adopt' - java-version: '8' + java-version: '19' - name: build run: | ./gradlew jar ./gradlew javadocJar + ./gradlew copyTestJars - uses: actions/upload-artifact@v3 with: name: java-jars path: | ./*java/dfp/build/libs/* ./*java/dfp-math/build/libs/* - ./*java/decimalDemo/build/libs/* + ./*java/dfp/testLibs/* + ./*java/dfp-math/testLibs/* + ./*java/dfpNativeTests/testLibs/* + ./*java/vtaTest/testLibs/* retention-days: 7 @@ -236,15 +240,15 @@ jobs: test-java: runs-on: ubuntu-latest - needs: [compress-native] + needs: [build-java] steps: - uses: actions/checkout@v3 with: submodules: 'recursive' - - name: Download compress-native artifacts + - name: Download java-jars artifacts uses: actions/download-artifact@v3 with: - name: compress-native + name: java-jars - name: Setup java uses: actions/setup-java@v3 with: @@ -255,7 +259,7 @@ jobs: ./gradlew :java:systemInfo:runSystemInfo - name: test run: | - ./gradlew check + ./gradlew runTestJars test-dotnet: diff --git a/gradle.properties b/gradle.properties index 4a386302..437c347c 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,2 +1,2 @@ -version=0.12.11-SNAPSHOT +version=1.0.0-SNAPSHOT valueTypesVersion=0.9.4 \ No newline at end of file diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 2e6e5897..774fae87 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.6.1-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/java/dfp-math/build.gradle b/java/dfp-math/build.gradle index ef9ac990..f78c0dbf 100644 --- a/java/dfp-math/build.gradle +++ b/java/dfp-math/build.gradle @@ -9,11 +9,7 @@ if (isReleaseVersion) { group = 'com.epam.deltix' -sourceCompatibility = 1.7 -compileTestJava { - sourceCompatibility = "1.8" - targetCompatibility = "1.8" -} +sourceCompatibility = 8 repositories { mavenCentral() @@ -134,3 +130,30 @@ jar { sourcesJar { duplicatesStrategy = DuplicatesStrategy.EXCLUDE } + +task testsJar(type: Jar, dependsOn: [jar, testClasses, processTestResources]) { + archiveClassifier = 'tests' + from sourceSets.test.output +} + +task copyTestDeps(type: Copy) { + from(sourceSets.test.runtimeClasspath) { include '*.jar' } + into('testLibs') +} + +task copyTestJars(type: Copy, dependsOn: [testsJar, copyTestDeps]) { + from(jar.outputs.files) + from(testsJar.outputs.files) + into('testLibs') +} + +task runTestJars(type: JavaExec) { + mainClass = 'org.junit.runner.JUnitCore' + classpath = files { file('testLibs').listFiles() } + + def testClassesRoot = file('src/test/java').absolutePath + fileTree(dir: testClassesRoot, include: '**/*Test.java').each { File file -> + def ap = file.absolutePath + args += ap.substring(testClassesRoot.length() + 1, ap.length() - 5).replace(File.separator, '.') + } +} diff --git a/java/dfp/build.gradle b/java/dfp/build.gradle index 33501ae1..68988dc8 100644 --- a/java/dfp/build.gradle +++ b/java/dfp/build.gradle @@ -9,10 +9,37 @@ if (isReleaseVersion) { group = 'com.epam.deltix' -sourceCompatibility = 1.7 +sourceSets { + java9 { + java { + srcDirs = ['src/main/java9'] + } + } + + java18 { + java { + srcDirs = ['src/main/java18'] + } + } +} + +compileJava { + sourceCompatibility = 1.7 + targetCompatibility = 1.7 +} compileTestJava { - sourceCompatibility = "1.8" - targetCompatibility = "1.8" + sourceCompatibility = 8 + targetCompatibility = 8 +} + +compileJava9Java { + sourceCompatibility = 9 + targetCompatibility = 9 +} + +compileJava18Java { + sourceCompatibility = 18 + targetCompatibility = 18 } repositories { @@ -25,6 +52,9 @@ java { } dependencies { + java9Implementation files(sourceSets.main.output.classesDirs) { builtBy compileJava } + java18Implementation files(sourceSets.main.output.classesDirs) { builtBy compileJava } + testImplementation project(':java:testUtils') testImplementation 'junit:junit:4.+' testImplementation 'org.apache.commons:commons-math3:3.6.1' @@ -97,8 +127,15 @@ if (isReleaseVersion) { } jar { + into('META-INF/versions/9') { + from sourceSets.java9.output + } + into('META-INF/versions/18') { + from sourceSets.java18.output + } manifest { - attributes 'Implementation-Title': 'Deltix Decimal Floating-Point Arithmetic Library', + attributes 'Multi-Release': 'true', + 'Implementation-Title': 'Deltix Decimal Floating-Point Arithmetic Library', 'Implementation-Version': archiveVersion, 'Main-Class': 'com.epam.deltix.dfp.Demo' } @@ -110,3 +147,29 @@ jar { } } +task testsJar(type: Jar, dependsOn: [jar, testClasses, processTestResources]) { + archiveClassifier = 'tests' + from sourceSets.test.output +} + +task copyTestDeps(type: Copy) { + from(sourceSets.test.runtimeClasspath) { include '*.jar' } + into('testLibs') +} + +task copyTestJars(type: Copy, dependsOn: [testsJar, copyTestDeps]) { + from(jar.outputs.files) + from(testsJar.outputs.files) + into('testLibs') +} + +task runTestJars(type: JavaExec) { + mainClass = 'org.junit.runner.JUnitCore' + classpath = files { file('testLibs').listFiles() } + + def testClassesRoot = file('src/test/java').absolutePath + fileTree(dir: testClassesRoot, include: '**/*Test.java').each { File file -> + def ap = file.absolutePath + args += ap.substring(testClassesRoot.length() + 1, ap.length() - 5).replace(File.separator, '.') + } +} diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java index c6085f16..5afc265e 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImpl.java @@ -29,6 +29,12 @@ private JavaImpl() { public static final int MIN_EXPONENT = -383; public static final int MAX_EXPONENT = 384; + // See the https://www.agner.org/optimize/optimizing_assembly.pdf part 16.8 Division (all processors) + public static final long FAST_DIV10_RECIPROCAL = 0xCCCCCCCDL; // (2^FAST_DIV10_SHIFT) / 10 + public static final int FAST_DIV10_SHIFT = 35; + public static final long FAST_DIV10_MUL10_MASK = 0x780000000L; // Highest nibble shifted out by FAST_DIV10_SHIFT + // (((1L << FAST_DIV10_SHIFT) - 1) >> (FAST_DIV10_SHIFT - 4)) << (FAST_DIV10_SHIFT - 4) + public static long fromInt32(final int value) { final long longValue = value; // Fixes -Integer.MIN_VALUE return value >= 0 ? (0x31C00000L << 32) | longValue : (0xB1C00000L << 32) | -longValue; @@ -161,15 +167,35 @@ public static long canonizeFinite(final long value) { if (coefficient == 0) return ZERO; - long div10 = coefficient / 10; - if (div10 * 10 != coefficient) - return value; - - do { - coefficient = div10; - div10 /= 10; - ++exponent; - } while (div10 * 10 == coefficient); + if ((int) coefficient == coefficient) { + long p = coefficient * FAST_DIV10_RECIPROCAL; + if ((p & FAST_DIV10_MUL10_MASK) != 0) + return value; + do { + coefficient = p >> FAST_DIV10_SHIFT; + p = coefficient * FAST_DIV10_RECIPROCAL; + ++exponent; + } while ((p & FAST_DIV10_MUL10_MASK) == 0); + } else { + long div10 = coefficient / 10; + if (div10 * 10 != coefficient) + return value; + do { + if ((int) div10 == div10) { + long p; + do { + coefficient = div10; + p = coefficient * FAST_DIV10_RECIPROCAL; + div10 = p >> FAST_DIV10_SHIFT; + ++exponent; + } while ((p & FAST_DIV10_MUL10_MASK) == 0); + break; + } + coefficient = div10; + div10 /= 10; + ++exponent; + } while (div10 * 10 == coefficient); + } return pack(signMask, exponent, coefficient, BID_ROUNDING_TO_NEAREST); } @@ -315,6 +341,17 @@ public static long fromDecimalDouble(final double x) { return y; for (long n = m; ; ) { + if ((int) n == n) { + long p; + while (true) { + p = n * FAST_DIV10_RECIPROCAL; + final long m10 = p >> FAST_DIV10_SHIFT; + if ((p & FAST_DIV10_MUL10_MASK) != 0) + return signAndExp + n; + n = m10; + signAndExp += 1L << EXPONENT_SHIFT_SMALL; + } + } final long m10 = n / 10; if (m10 * 10 != n) return signAndExp + n; @@ -1671,48 +1708,14 @@ private static long packUnderflow(final boolean isSigned, final int exponent, lo final long QM2_0; final long QM2_1; { - final long CXH; - final long CXL; - final long CYH; - final long CYL; - final long PL; - long PH; - long PM; - final long PM2; - CXH = (C128_0) >>> 32; - CXL = C128_0 & UINT32_MAX; - CYH = (bid_reciprocals10_128[extra_digits][1]) >>> 32; - CYL = bid_reciprocals10_128[extra_digits][1] & UINT32_MAX; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += (PM >>> 32); - PM = (PM & UINT32_MAX) + PM2 + (PL >>> 32); - ALBH_1 = PH + (PM >>> 32); - ALBH_0 = (PM << 32) + (PL & UINT32_MAX); + final long __CY = bid_reciprocals10_128[extra_digits][1]; + ALBH_1 = Mul64Impl.unsignedMultiplyHigh(C128_0, __CY); + ALBH_0 = C128_0 * __CY; } { - final long CXH; - final long CXL; - final long CYH; - final long CYL; - final long PL; - long PH; - long PM; - final long PM2; - CXH = ((C128_0)) >>> 32; - CXL = C128_0 & UINT32_MAX; - CYH = (bid_reciprocals10_128[extra_digits][0]) >>> 32; - CYL = bid_reciprocals10_128[extra_digits][0] & UINT32_MAX; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += (PM >>> 32); - PM = (PM & UINT32_MAX) + PM2 + (PL >>> 32); - ALBL_1 = PH + (PM >>> 32); - ALBL_0 = (PM << 32) + (PL & UINT32_MAX); + final long __CY = bid_reciprocals10_128[extra_digits][0]; + ALBL_1 = Mul64Impl.unsignedMultiplyHigh(C128_0, __CY); + ALBL_0 = C128_0 * __CY; } Q_low_0 = ALBL_0; { @@ -2054,48 +2057,14 @@ public static long pack(final long signMask, final int exponentIn, final long co final long QM2_0; final long QM2_1; { - final long CXH; - final long CXL; - final long CYH; - final long CYL; - final long PL; - long PH; - long PM; - final long PM2; - CXH = coefficient >>> 32; - CXL = (int) ((coefficient)); - CYH = bid_reciprocals10_128[extra_digits][1] >>> 32; - CYL = (int) bid_reciprocals10_128[extra_digits][1]; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += (PM >>> 32); - PM = (PM & 0xFFFFFFFFL) + PM2 + (PL >> 32); - ALBH_1 = PH + (PM >> 32); - ALBH_0 = (PM << 32) + (int) PL; + final long __CY = bid_reciprocals10_128[extra_digits][1]; + ALBH_1 = Mul64Impl.unsignedMultiplyHigh(coefficient, __CY); + ALBH_0 = coefficient * __CY; } { - final long CXH; - final long CXL; - final long CYH; - final long CYL; - final long PL; - long PH; - long PM; - final long PM2; - CXH = ((coefficient)) >>> 32; - CXL = (int) ((coefficient)); - CYH = bid_reciprocals10_128[extra_digits][0] >>> 32; - CYL = (int) bid_reciprocals10_128[extra_digits][0]; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += (PM >>> 32); - PM = (PM & 0xFFFFFFFFL) + PM2 + (PL >>> 32); - ALBL_1 = PH + (PM >>> 32); - ALBL_0 = (PM << 32) + (PL & 0xFFFFFFFFL); + final long __CY = bid_reciprocals10_128[extra_digits][0]; + ALBL_1 = Mul64Impl.unsignedMultiplyHigh(coefficient, __CY); + ALBL_0 = coefficient * __CY; } Q_low_0 = ALBL_0; { diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplAdd.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplAdd.java index f01afddd..cb648b7f 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplAdd.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplAdd.java @@ -387,23 +387,9 @@ else if ((UnsignedLong.isLess(coefficient_a, bid_power10_table_128_w0[18]))) // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT, coefficient_a, bid_reciprocals10_64[extra_digits]); { - long __CX = coefficient_a; - long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_reciprocals10_64[extra_digits]; + CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_a, __CY); + CT_w0 = coefficient_a * __CY; } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 @@ -443,23 +429,9 @@ else if ((UnsignedLong.isLess(coefficient_a, bid_power10_table_128_w0[18]))) // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT, coefficient_b, bid_reciprocals10_64[extra_digits]); { - final long __CX = coefficient_b; final long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, __CY); + CT_w0 = coefficient_b * __CY; } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 @@ -478,25 +450,8 @@ else if ((UnsignedLong.isLess(coefficient_a, bid_power10_table_128_w0[18]))) // must divide coeff_a by 10 saved_ca = saved_ca + T1; //__mul_64x64_to_128(CA, saved_ca, 0x3333333333333334L); - { - final long __CX = saved_ca; - final long __CY = 0x3333333333333334L; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CA_w1 = __PH + (__PM >>> 32); - CA_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CA_w1 = Mul64Impl.unsignedMultiplyHigh(saved_ca, 0x3333333333333334L); + CA_w0 = saved_ca * 0x3333333333333334L; //reciprocals10_64[1]); coefficient_a = CA_w1 >>> 1; rem_a = @@ -513,23 +468,9 @@ else if ((UnsignedLong.isLess(coefficient_a, bid_power10_table_128_w0[18]))) // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT, coefficient_b, bid_reciprocals10_64[extra_digits]); { - final long __CX = coefficient_b; final long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, __CY); + CT_w0 = coefficient_b * __CY; } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[extra_digits]; @@ -548,23 +489,9 @@ else if ((UnsignedLong.isLess(coefficient_a, bid_power10_table_128_w0[18]))) // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT_new, coefficient_b, bid_reciprocals10_64[extra_digits]); { - final long __CX = coefficient_b; final long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_new_w1 = __PH + (__PM >>> 32); - CT_new_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + CT_new_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, __CY); + CT_new_w0 = coefficient_b * __CY; } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[extra_digits]; diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCast.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCast.java index 6dedbc61..0bf2e9a0 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCast.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCast.java @@ -92,19 +92,9 @@ private JavaImplCast() { //__mul_64x64_to_128MACH(C, C1, bid_ten2k64[20 - q]); { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = C1 >>> 32; - CXL = LONG_LOW_PART & C1; - CYH = bid_ten2k64[20 - q] >>> 32; - CYL = LONG_LOW_PART & bid_ten2k64[20 - q]; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - C_w1 = PH + (PM >>> 32); - C_w0 = (PM << 32) + (LONG_LOW_PART & PL); + final long __CY = bid_ten2k64[20 - q]; + C_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + C_w0 = C1 * __CY; } // Note: C1 * 10^(11-q) has 19 or 20 digits; 0x5000000000000000a, has 20 @@ -128,19 +118,9 @@ private JavaImplCast() { //__mul_64x64_to_128MACH(C, C1, bid_ten2k64[20 - q]); { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = C1 >>> 32; - CXL = LONG_LOW_PART & C1; - CYH = bid_ten2k64[20 - q] >>> 32; - CYL = LONG_LOW_PART & bid_ten2k64[20 - q]; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - C_w1 = PH + (PM >>> 32); - C_w0 = (PM << 32) + (LONG_LOW_PART & PL); + final long __CY = bid_ten2k64[20 - q]; + C_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + C_w0 = C1 * __CY; } if (UnsignedLong.isGreaterOrEqual(C_w1, 0x05L)) { @@ -180,19 +160,9 @@ private JavaImplCast() { //__mul_64x64_to_128MACH(P128, C1, bid_ten2mk64[ind - 1]); { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = C1 >>> 32; - CXL = LONG_LOW_PART & C1; - CYH = bid_ten2mk64[ind - 1] >>> 32; - CYL = LONG_LOW_PART & bid_ten2mk64[ind - 1]; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - P128_w1 = PH + (PM >>> 32); - P128_w0 = (PM << 32) + (LONG_LOW_PART & PL); + final long __CY = bid_ten2mk64[ind - 1]; + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } Cstar = P128_w1; @@ -624,19 +594,9 @@ public static long bid64_from_int64(final long /*BID_SINT64*/ x, final int rnd_m //__mul_64x64_to_128MACH(__P128, C, bid_Kx64[__ind]); { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = C >>> 32; - CXL = LONG_LOW_PART & C; - CYH = bid_Kx64[__ind] >>> 32; - CYL = LONG_LOW_PART & bid_Kx64[__ind]; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - __P128_w1 = PH + (PM >>> 32); - __P128_w0 = (PM << 32) + (LONG_LOW_PART & PL); + final long __CY = bid_Kx64[__ind]; + __P128_w1 = Mul64Impl.unsignedMultiplyHigh(C, __CY); + __P128_w0 = C * __CY; } // calculate C* = floor (__P128) and f* diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCastBinary64.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCastBinary64.java index 9ede86b7..b479dda2 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCastBinary64.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCastBinary64.java @@ -146,25 +146,8 @@ public static long binary64_to_bid64(double x, final int rnd_mode/*, final JavaI long /*BID_UINT64*/ QM64; //__mul_64x64_to_128(ALBL, cc_w0, pow5_w0); - { - long __CX = cc_w0; - long __CY = pow5_w0; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - ALBL_w1 = __PH + (__PM >>> 32); - ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(cc_w0, pow5_w0); + ALBL_w0 = cc_w0 * pow5_w0; QM64 = pow5_w0 * cc_w1 + cc_w0 * pow5_w1; @@ -217,66 +200,14 @@ public static long binary64_to_bid64(double x, final int rnd_mode/*, final JavaI { long /*BID_UINT128*/ lP0_w0, lP0_w1, lP1_w0, lP1_w1, lP2_w0, lP2_w1, lP3_w0, lP3_w1; long /*BID_UINT64*/ lC; - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w0 >>> 32; - CXL = LONG_LOW_PART & c_w0; - CYH = r_w0 >>> 32; - CYL = LONG_LOW_PART & r_w0; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP0_w1 = PH + (PM >>> 32); - lP0_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w0 >>> 32; - CXL = LONG_LOW_PART & c_w0; - CYH = r_w1 >>> 32; - CYL = LONG_LOW_PART & r_w1; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP1_w1 = PH + (PM >>> 32); - lP1_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w0 >>> 32; - CXL = LONG_LOW_PART & c_w0; - CYH = r_w2 >>> 32; - CYL = LONG_LOW_PART & r_w2; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP2_w1 = PH + (PM >>> 32); - lP2_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w0 >>> 32; - CXL = LONG_LOW_PART & c_w0; - CYH = r_w3 >>> 32; - CYL = LONG_LOW_PART & r_w3; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP3_w1 = PH + (PM >>> 32); - lP3_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } + lP0_w1 = Mul64Impl.unsignedMultiplyHigh(c_w0, r_w0); + lP0_w0 = c_w0 * r_w0; + lP1_w1 = Mul64Impl.unsignedMultiplyHigh(c_w0, r_w1); + lP1_w0 = c_w0 * r_w1; + lP2_w1 = Mul64Impl.unsignedMultiplyHigh(c_w0, r_w2); + lP2_w0 = c_w0 * r_w2; + lP3_w1 = Mul64Impl.unsignedMultiplyHigh(c_w0, r_w3); + lP3_w0 = c_w0 * r_w3; P0_w0 = lP0_w0; { long /*BID_UINT64*/ X1 = lP1_w0; @@ -298,66 +229,14 @@ public static long binary64_to_bid64(double x, final int rnd_mode/*, final JavaI { long /*BID_UINT128*/ lP0_w0, lP0_w1, lP1_w0, lP1_w1, lP2_w0, lP2_w1, lP3_w0, lP3_w1; long /*BID_UINT64*/ lC; - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w0 >>> 32; - CYL = LONG_LOW_PART & r_w0; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP0_w1 = PH + (PM >>> 32); - lP0_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w1 >>> 32; - CYL = LONG_LOW_PART & r_w1; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP1_w1 = PH + (PM >>> 32); - lP1_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w2 >>> 32; - CYL = LONG_LOW_PART & r_w2; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP2_w1 = PH + (PM >>> 32); - lP2_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w3 >>> 32; - CYL = LONG_LOW_PART & r_w3; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP3_w1 = PH + (PM >>> 32); - lP3_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } + lP0_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w0); + lP0_w0 = c_w1 * r_w0; + lP1_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w1); + lP1_w0 = c_w1 * r_w1; + lP2_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w2); + lP2_w0 = c_w1 * r_w2; + lP3_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w3); + lP3_w0 = c_w1 * r_w3; P1_w0 = lP0_w0; { long /*BID_UINT64*/ X1 = lP1_w0; @@ -527,69 +406,17 @@ public static double bid64_to_binary64(final long /*BID_UINT64*/ x, final int rn long /*BID_UINT128*/ lP0_w0, lP0_w1, lP1_w0, lP1_w1, lP2_w0, lP2_w1, lP3_w0, lP3_w1; long /*BID_UINT64*/ lC; //__mul_64x64_to_128(lP0, c_w1, r_w0); - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w0 >>> 32; - CYL = LONG_LOW_PART & r_w0; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP0_w1 = PH + (PM >>> 32); - lP0_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } + lP0_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w0); + lP0_w0 = c_w1 * r_w0; //__mul_64x64_to_128(lP1, c_w1, r_w1); - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w1 >>> 32; - CYL = LONG_LOW_PART & r_w1; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP1_w1 = PH + (PM >>> 32); - lP1_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } + lP1_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w1); + lP1_w0 = c_w1 * r_w1; //__mul_64x64_to_128(lP2, c_w1, r_w2); - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w2 >>> 32; - CYL = LONG_LOW_PART & r_w2; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP2_w1 = PH + (PM >>> 32); - lP2_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } + lP2_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w2); + lP2_w0 = c_w1 * r_w2; //__mul_64x64_to_128(lP3, c_w1, r_w3); - { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = c_w1 >>> 32; - CXL = LONG_LOW_PART & c_w1; - CYH = r_w3 >>> 32; - CYL = LONG_LOW_PART & r_w3; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - lP3_w1 = PH + (PM >>> 32); - lP3_w0 = (PM << 32) + (LONG_LOW_PART & PL); - } + lP3_w1 = Mul64Impl.unsignedMultiplyHigh(c_w1, r_w3); + lP3_w0 = c_w1 * r_w3; z_w0 = lP0_w0; //__add_carry_out(P_w1,lC,lP1_w0,lP0_w1); { diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java index af75b1e4..b87963c3 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplCmp.java @@ -134,23 +134,9 @@ public static int compare(final long /*BID_UINT64*/ x, final long /*BID_UINT64*/ // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 { - final long __CX = sig_x; final long __CY = bid_mult_factor[exp_x - exp_y]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_n_prime_w0 = sig_x * __CY; } // if values are equal @@ -165,23 +151,9 @@ public static int compare(final long /*BID_UINT64*/ x, final long /*BID_UINT64*/ // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 { - final long __CX = sig_y; final long __CY = bid_mult_factor[exp_y - exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, __CY); + sig_n_prime_w0 = sig_y * __CY; } // if values are equal @@ -190,7 +162,7 @@ public static int compare(final long /*BID_UINT64*/ x, final long /*BID_UINT64*/ } // if positive, return whichever significand abs is smaller // (converse if negative) - return (((UnsignedLong.isGreater(sig_n_prime_w1, 0)) + return (((/*UnsignedLong.isGreater*/(sig_n_prime_w1 != 0)) || (UnsignedLong.isLess(sig_x, sig_n_prime_w0))) ^ (!x_mask_sign)) ? 1 : -1; // @AD: TODO: Check this case carefully } @@ -395,52 +367,24 @@ public static boolean bid64_quiet_greater(final long /*BID_UINT64*/ x, final lon // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 { - final long __CX = sig_x; final long __CY = bid_mult_factor[exp_x - exp_y]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_n_prime_w0 = sig_x * __CY; } // if postitive, return whichever significand is larger (converse if neg.) if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_y)) { return false; } - return (((UnsignedLong.isGreater(sig_n_prime_w1, 0)) + return (((/*UnsignedLong.isGreater*/(sig_n_prime_w1 != 0)) || UnsignedLong.isGreater(sig_n_prime_w0, sig_y)) ^ (x_mask_sign)); } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 { - final long __CX = sig_y; final long __CY = bid_mult_factor[exp_y - exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, __CY); + sig_n_prime_w0 = sig_y * __CY; } // if positive, return whichever significand is larger @@ -558,23 +502,9 @@ public static boolean bid64_quiet_greater_equal(final long /*BID_UINT64*/ x, fin // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 { - final long __CX = sig_x; final long __CY = bid_mult_factor[exp_x - exp_y]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_n_prime_w0 = sig_x * __CY; } // return 1 if values are equal @@ -589,23 +519,9 @@ public static boolean bid64_quiet_greater_equal(final long /*BID_UINT64*/ x, fin // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 { - final long __CX = sig_y; final long __CY = bid_mult_factor[exp_y - exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, __CY); + sig_n_prime_w0 = sig_y * __CY; } // return 0 if values are equal @@ -614,7 +530,7 @@ public static boolean bid64_quiet_greater_equal(final long /*BID_UINT64*/ x, fin } // if positive, return whichever significand abs is smaller // (converse if negative) - return (((UnsignedLong.isGreater(sig_n_prime_w1, 0)) + return (((/*UnsignedLong.isGreater*/(sig_n_prime_w1 != 0)) || (UnsignedLong.isLess(sig_x, sig_n_prime_w0))) ^ (!x_mask_sign)); } @@ -726,23 +642,9 @@ public static boolean bid64_quiet_less(final long /*BID_UINT64*/ x, final long / // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 { - final long __CX = sig_x; final long __CY = bid_mult_factor[exp_x - exp_y]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_n_prime_w0 = sig_x * __CY; } // return 0 if values are equal @@ -757,23 +659,9 @@ public static boolean bid64_quiet_less(final long /*BID_UINT64*/ x, final long / // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 { - final long __CX = sig_y; final long __CY = bid_mult_factor[exp_y - exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, __CY); + sig_n_prime_w0 = sig_y * __CY; } // return 0 if values are equal @@ -782,7 +670,7 @@ public static boolean bid64_quiet_less(final long /*BID_UINT64*/ x, final long / } // if positive, return whichever significand abs is smaller // (converse if negative) - return (((UnsignedLong.isGreater(sig_n_prime_w1, 0)) + return (((/*UnsignedLong.isGreater*/(sig_n_prime_w1 != 0)) || (UnsignedLong.isLess(sig_x, sig_n_prime_w0))) ^ (x_mask_sign)); } @@ -893,23 +781,9 @@ public static boolean bid64_quiet_less_equal(final long /*BID_UINT64*/ x, final // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 { - final long __CX = sig_x; final long __CY = bid_mult_factor[exp_x - exp_y]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_n_prime_w0 = sig_x * __CY; } // return 1 if values are equal @@ -924,23 +798,9 @@ public static boolean bid64_quiet_less_equal(final long /*BID_UINT64*/ x, final // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 { - final long __CX = sig_y; final long __CY = bid_mult_factor[exp_y - exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, __CY); + sig_n_prime_w0 = sig_y * __CY; } // return 1 if values are equal @@ -949,7 +809,7 @@ public static boolean bid64_quiet_less_equal(final long /*BID_UINT64*/ x, final } // if positive, return whichever significand abs is smaller // (converse if negative) - return (((UnsignedLong.isGreater(sig_n_prime_w1, 0)) + return (((/*UnsignedLong.isGreater*/(sig_n_prime_w1 != 0)) || (UnsignedLong.isLess(sig_x, sig_n_prime_w0))) ^ (x_mask_sign)); } @@ -1097,23 +957,9 @@ public static boolean bid64_isNormal(final long /*BID_UINT64*/ x) { if (exp_x < 15) { // __mul_64x64_to_128MACH (sig_x_prime, sig_x, bid_mult_factor[exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 { - final long __CX = sig_x; final long __CY = bid_mult_factor[exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_x_prime_w1 = __PH + (__PM >>> 32); - sig_x_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + sig_x_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_x_prime_w0 = sig_x * __CY; } // normal diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplDiv.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplDiv.java index e668183a..ce69629f 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplDiv.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplDiv.java @@ -231,25 +231,8 @@ else if (exponent_x < 0) T = bid_power10_table_128_w0[ed1]; //__mul_64x64_to_128 (CA, A, T); - { - long __CX = A; - long __CY = T; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CA_w1 = __PH + (__PM >>> 32); - CA_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CA_w1 = Mul64Impl.unsignedMultiplyHigh(A, T); + CA_w0 = A * T; Q = 0; diff_expon = diff_expon - ed2; @@ -298,25 +281,8 @@ else if (exponent_x < 0) T = bid_power10_table_128_w0[ed2]; //__mul_64x64_to_128 (CA, R, T); - { - long __CX = R; - long __CY = T; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CA_w1 = __PH + (__PM >>> 32); - CA_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CA_w1 = Mul64Impl.unsignedMultiplyHigh(R, T); + CA_w0 = R * T; B = coefficient_y; @@ -399,25 +365,8 @@ else if (exponent_x < 0) nzeros = d5; //__mul_64x64_to_128 (CT, Q, bid_reciprocals10_64[nzeros]); - { - long __CX = Q; - long __CY = bid_reciprocals10_64[nzeros]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CT_w1 = Mul64Impl.unsignedMultiplyHigh(Q, bid_reciprocals10_64[nzeros]); + // CT_w0 = Q * bid_reciprocals10_64[nzeros]; // @optimization // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[nzeros]; @@ -465,25 +414,8 @@ else if (exponent_x < 0) if (nzeros != 0) { //__mul_64x64_to_128 (CT, Q, bid_reciprocals10_64[nzeros]); - { - long __CX = Q; - long __CY = bid_reciprocals10_64[nzeros]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CT_w1 = Mul64Impl.unsignedMultiplyHigh(Q, bid_reciprocals10_64[nzeros]); + // CT_w0 = Q * bid_reciprocals10_64[nzeros]; // @optimization // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 amount = bid_short_recip_scale[nzeros]; @@ -553,45 +485,11 @@ public static long get_BID64_UF(final long sgn, final int expon, long coeff, fin long _ALBL_w0, _ALBL_w1, _ALBH_w0, _ALBH_w1, _QM2_w0, _QM2_w1; //__mul_64x64_to_128(out ALBH, A, B.w1); - { - final long __CX = _A; - final long __CY = _B_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A, _B_w1); + _ALBH_w0 = _A * _B_w1; //__mul_64x64_to_128(out ALBL, A, B.w0); - { - final long __CX = _A; - final long __CY = _B_w0; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A, _B_w0); + _ALBL_w0 = _A * _B_w0; Q_low_w0 = _ALBL_w0; //__add_128_64(out QM2, ALBH, ALBL.w1); diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java index 12037025..4716d528 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplFma.java @@ -369,23 +369,8 @@ else if (exponent_x < 0) // get 128-bit product: coefficient_x*coefficient_y //__mul_64x64_to_128(P, coefficient_x, coefficient_y); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coefficient_x >>> 32; - __CXL = LONG_LOW_PART & coefficient_x; - __CYH = coefficient_y >>> 32; - __CYL = LONG_LOW_PART & coefficient_y; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P_w1 = __PH + (__PM >>> 32); - P_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + P_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_x, coefficient_y); + P_w0 = coefficient_x * coefficient_y; // tighten binary range of P: leading bit is 2^bp @@ -465,22 +450,9 @@ else if (exponent_x < 0) // align coeff_x, CYh //__mul_64x64_to_128(CZ, coefficient_z, bid_power10_table_128_flat[(extra_digits << 1) /*+ 0*/]); { - long __CY = bid_power10_table_128_BID_UINT128[(extra_digits << 1) /*+ 0*/]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coefficient_z >>> 32; - __CXL = LONG_LOW_PART & coefficient_z; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CZ_w1 = __PH + (__PM >>> 32); - CZ_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_power10_table_128_BID_UINT128[(extra_digits << 1) /*+ 0*/]; + CZ_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_z, __CY); + CZ_w0 = coefficient_z * __CY; } if (sign_z == (sign_y ^ sign_x)) { @@ -755,22 +727,9 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT, coefficient_a, bid_reciprocals10_64[extra_digits]); { - long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coefficient_a >>> 32; - __CXL = LONG_LOW_PART & coefficient_a; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_reciprocals10_64[extra_digits]; + CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_a, __CY); + CT_w0 = coefficient_a * __CY; } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 @@ -825,22 +784,9 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT, coefficient_b, bid_reciprocals10_64[extra_digits]); { - long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coefficient_b >>> 32; - __CXL = LONG_LOW_PART & coefficient_b; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_reciprocals10_64[extra_digits]; + CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, __CY); + CT_w0 = coefficient_b * __CY; } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 @@ -863,24 +809,8 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 saved_ca = saved_ca + T1; //__mul_64x64_to_128(CA, saved_ca, 0x3333333333333334L); - { - long __CY = 0x3333333333333334L; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = saved_ca >>> 32; - __CXL = LONG_LOW_PART & saved_ca; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CA_w1 = __PH + (__PM >>> 32); - CA_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CA_w1 = Mul64Impl.unsignedMultiplyHigh(saved_ca, 0x3333333333333334L); + // CA_w0 = saved_ca * 0x3333333333333334L; // @optimization //reciprocals10_64[1]); coefficient_a = CA_w1 >>> 1; @@ -902,22 +832,9 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT, coefficient_b, bid_reciprocals10_64[extra_digits]); { - long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coefficient_b >>> 32; - __CXL = LONG_LOW_PART & coefficient_b; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_w1 = __PH + (__PM >>> 32); - CT_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_reciprocals10_64[extra_digits]; + CT_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, __CY); + CT_w0 = coefficient_b * __CY; } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 @@ -939,22 +856,9 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 // get P*(2^M[extra_digits])/10^extra_digits //__mul_64x64_to_128(CT_new, coefficient_b, bid_reciprocals10_64[extra_digits]); { - long __CY = bid_reciprocals10_64[extra_digits]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coefficient_b >>> 32; - __CXL = LONG_LOW_PART & coefficient_b; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CT_new_w1 = __PH + (__PM >>> 32); - CT_new_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_reciprocals10_64[extra_digits]; + CT_new_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_b, __CY); + CT_new_w0 = coefficient_b * __CY; } // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 @@ -1046,80 +950,20 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 long _ALBL_w1, _ALBH_w1, _AHBL_w1, _AHBH_w1, _QM_w1, _QM2_w1; //__mul_64x64_to_128(ALBH, (A)_w0, (B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w1); + _ALBH_w0 = _A_w0 * _B_w1; //__mul_64x64_to_128(AHBL, (B)_w0, (A)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _B_w0 >>> 32; - __CXL = LONG_LOW_PART & _B_w0; - __CYH = _A_w1 >>> 32; - __CYL = LONG_LOW_PART & _A_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBL_w1 = __PH + (__PM >>> 32); - _AHBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBL_w1 = Mul64Impl.unsignedMultiplyHigh(_B_w0, _A_w1); + _AHBL_w0 = _B_w0 * _A_w1; //__mul_64x64_to_128(ALBL, (A)_w0, (B)_w0); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w0 >>> 32; - __CYL = LONG_LOW_PART & _B_w0; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w0); + _ALBL_w0 = _A_w0 * _B_w0; //__mul_64x64_to_128(AHBH, (A)_w1,(B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w1 >>> 32; - __CXL = LONG_LOW_PART & _A_w1; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBH_w1 = __PH + (__PM >>> 32); - _AHBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w1, _B_w1); + _AHBH_w0 = _A_w1 * _B_w1; //__add_128_128(QM, ALBH, AHBL); // add 128-bit value to 128-bit assume no carry-out { @@ -1266,80 +1110,20 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 long _ALBL_w1, _ALBH_w1, _AHBL_w1, _AHBH_w1, _QM_w1, _QM2_w1; //__mul_64x64_to_128(ALBH, (A)_w0, (B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w1); + _ALBH_w0 = _A_w0 * _B_w1; //__mul_64x64_to_128(AHBL, (B)_w0, (A)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _B_w0 >>> 32; - __CXL = LONG_LOW_PART & _B_w0; - __CYH = _A_w1 >>> 32; - __CYL = LONG_LOW_PART & _A_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBL_w1 = __PH + (__PM >>> 32); - _AHBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBL_w1 = Mul64Impl.unsignedMultiplyHigh(_B_w0, _A_w1); + _AHBL_w0 = _B_w0 * _A_w1; //__mul_64x64_to_128(ALBL, (A)_w0, (B)_w0); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w0 >>> 32; - __CYL = LONG_LOW_PART & _B_w0; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w0); + _ALBL_w0 = _A_w0 * _B_w0; //__mul_64x64_to_128(AHBH, (A)_w1,(B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w1 >>> 32; - __CXL = LONG_LOW_PART & _A_w1; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBH_w1 = __PH + (__PM >>> 32); - _AHBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w1, _B_w1); + _AHBH_w0 = _A_w1 * _B_w1; //__add_128_128(QM, ALBH, AHBL); // add 128-bit value to 128-bit assume no carry-out { @@ -1441,80 +1225,20 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 long _ALBL_w1, _ALBH_w1, _AHBL_w1, _AHBH_w1, _QM_w1, _QM2_w1; //__mul_64x64_to_128(ALBH, (A)_w0, (B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w1); + _ALBH_w0 = _A_w0 * _B_w1; //__mul_64x64_to_128(AHBL, (B)_w0, (A)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _B_w0 >>> 32; - __CXL = LONG_LOW_PART & _B_w0; - __CYH = _A_w1 >>> 32; - __CYL = LONG_LOW_PART & _A_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBL_w1 = __PH + (__PM >>> 32); - _AHBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBL_w1 = Mul64Impl.unsignedMultiplyHigh(_B_w0, _A_w1); + _AHBL_w0 = _B_w0 * _A_w1; //__mul_64x64_to_128(ALBL, (A)_w0, (B)_w0); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w0 >>> 32; - __CYL = LONG_LOW_PART & _B_w0; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w0); + _ALBL_w0 = _A_w0 * _B_w0; //__mul_64x64_to_128(AHBH, (A)_w1,(B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w1 >>> 32; - __CXL = LONG_LOW_PART & _A_w1; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBH_w1 = __PH + (__PM >>> 32); - _AHBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w1, _B_w1); + _AHBH_w0 = _A_w1 * _B_w1; //__add_128_128(QM, ALBH, AHBL); // add 128-bit value to 128-bit assume no carry-out { @@ -1690,80 +1414,20 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 long _ALBL_w1, _ALBH_w1, _AHBL_w1, _AHBH_w1, _QM_w1, _QM2_w1; //__mul_64x64_to_128(ALBH, (A)_w0, (B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w1); + _ALBH_w0 = _A_w0 * _B_w1; //__mul_64x64_to_128(AHBL, (B)_w0, (A)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _B_w0 >>> 32; - __CXL = LONG_LOW_PART & _B_w0; - __CYH = _A_w1 >>> 32; - __CYL = LONG_LOW_PART & _A_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBL_w1 = __PH + (__PM >>> 32); - _AHBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBL_w1 = Mul64Impl.unsignedMultiplyHigh(_B_w0, _A_w1); + _AHBL_w0 = _B_w0 * _A_w1; //__mul_64x64_to_128(ALBL, (A)_w0, (B)_w0); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w0 >>> 32; - __CXL = LONG_LOW_PART & _A_w0; - __CYH = _B_w0 >>> 32; - __CYL = LONG_LOW_PART & _B_w0; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w0); + _ALBL_w0 = _A_w0 * _B_w0; //__mul_64x64_to_128(AHBH, (A)_w1,(B)_w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = _A_w1 >>> 32; - __CXL = LONG_LOW_PART & _A_w1; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBH_w1 = __PH + (__PM >>> 32); - _AHBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w1, _B_w1); + _AHBH_w0 = _A_w1 * _B_w1; //__add_128_128(QM, ALBH, AHBL); // add 128-bit value to 128-bit assume no carry-out { @@ -1931,19 +1595,8 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 //__mul_64x64_to_128(CX, coefficient_x, bid_power10_table_128_flat[(diff_dec_expon << 1) /*+ 0*/]); { final long /*BID_UINT64*/ __CY = bid_power10_table_128_BID_UINT128[(diff_dec_expon << 1) /*+ 0*/]; - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = coefficient_x >>> 32; - CXL = LONG_LOW_PART & coefficient_x; - CYH = __CY >>> 32; - CYL = LONG_LOW_PART & __CY; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - CX_w1 = PH + (PM >>> 32); - CX_w0 = (PM << 32) + (LONG_LOW_PART & PL); + CX_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_x, __CY); + CX_w0 = coefficient_x * __CY; } if (sign_x == sign_y) { @@ -2053,19 +1706,8 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 //__mul_64x64_to_128(CX, coefficient_x, bid_power10_table_128_flat[(diff_dec2 << 1) /*+ 0*/]); { long /*BID_UINT64*/ __CY = bid_power10_table_128_BID_UINT128[(diff_dec2 << 1) /*+ 0*/]; - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = coefficient_x >>> 32; - CXL = LONG_LOW_PART & coefficient_x; - CYH = __CY >>> 32; - CYL = LONG_LOW_PART & __CY; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - CX_w1 = PH + (PM >>> 32); - CX_w0 = (PM << 32) + (LONG_LOW_PART & PL); + CX_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_x, __CY); + CX_w0 = coefficient_x * __CY; } @@ -2169,23 +1811,8 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 final long /*BID_UINT64*/ ALBH_L = S * CY_w1; //__mul_64x64_to_128(CY_L, S, CY_w0); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = S >>> 32; - __CXL = LONG_LOW_PART & S; - __CYH = CY_w0 >>> 32; - __CYL = LONG_LOW_PART & CY_w0; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - CY_L_w1 = __PH + (__PM >>> 32); - CY_L_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + CY_L_w1 = Mul64Impl.unsignedMultiplyHigh(S, CY_w0); + CY_L_w0 = S * CY_w0; CY_L_w1 += ALBH_L; } @@ -2224,19 +1851,8 @@ else if (UnsignedLong.isLess(coefficient_a, bid_power10_table_128_BID_UINT128[(1 //__mul_64x64_to_128(F, coefficient_y, S); { - long /*BID_UINT64*/ CXH, CXL, CYH, CYL, PL, PH, PM, PM2; - CXH = coefficient_y >>> 32; - CXL = LONG_LOW_PART & coefficient_y; - CYH = S >>> 32; - CYL = LONG_LOW_PART & S; - PM = CXH * CYL; - PH = CXH * CYH; - PL = CXL * CYL; - PM2 = CXL * CYH; - PH += PM >>> 32; - PM = (LONG_LOW_PART & PM) + PM2 + (PL >>> 32); - F_w1 = PH + (PM >>> 32); - F_w0 = (PM << 32) + (LONG_LOW_PART & PL); + F_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_y, S); + F_w0 = coefficient_y * S; } // fraction diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java index 46fa4ea2..5a2af8fd 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMinMax.java @@ -143,23 +143,9 @@ public static long bid64_min_fix_nan(long /*BID_UINT64*/ x, long /*BID_UINT64*/ // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 { - final long __CX = sig_x; final long __CY = bid_mult_factor[exp_x - exp_y]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_n_prime_w0 = sig_x * __CY; } @@ -169,29 +155,15 @@ public static long bid64_min_fix_nan(long /*BID_UINT64*/ x, long /*BID_UINT64*/ return y; } - return ((UnsignedLong.isGreater(sig_n_prime_w1, 0) + return ((/*UnsignedLong.isGreater*/(sig_n_prime_w1 != 0) || UnsignedLong.isGreater(sig_n_prime_w0, sig_y)) ^ ((x & MASK_SIGN) == MASK_SIGN)) ? y : x; } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 { - final long __CX = sig_y; final long __CY = bid_mult_factor[exp_y - exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, __CY); + sig_n_prime_w0 = sig_y * __CY; } // if postitive, return whichever significand is larger (converse if negative) @@ -337,23 +309,9 @@ public static long bid64_max_fix_nan(long /*BID_UINT64*/ x, long /*BID_UINT64*/ // otherwise adjust the x significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_x, bid_mult_factor[exp_x - exp_y]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 { - final long __CX = sig_x; final long __CY = bid_mult_factor[exp_x - exp_y]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_x, __CY); + sig_n_prime_w0 = sig_x * __CY; } // if postitive, return whichever significand is larger @@ -361,29 +319,15 @@ public static long bid64_max_fix_nan(long /*BID_UINT64*/ x, long /*BID_UINT64*/ if (sig_n_prime_w1 == 0 && (sig_n_prime_w0 == sig_y)) { return y; } - return ((UnsignedLong.isGreater(sig_n_prime_w1, 0) + return ((/*UnsignedLong.isGreater*/(sig_n_prime_w1 != 0) || UnsignedLong.isGreater(sig_n_prime_w0, sig_y)) ^ ((x & MASK_SIGN) == MASK_SIGN)) ? x : y; } // adjust the y significand upwards // __mul_64x64_to_128MACH (sig_n_prime, sig_y, bid_mult_factor[exp_y - exp_x]); // @AD: Note: The __mul_64x64_to_128MACH macro is the same as __mul_64x64_to_128 { - final long __CX = sig_y; final long __CY = bid_mult_factor[exp_y - exp_x]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - sig_n_prime_w1 = __PH + (__PM >>> 32); - sig_n_prime_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + sig_n_prime_w1 = Mul64Impl.unsignedMultiplyHigh(sig_y, __CY); + sig_n_prime_w0 = sig_y * __CY; } // if postitive, return whichever significand is larger (converse if negative) diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMul.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMul.java index 6d4c710a..c52797af 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMul.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplMul.java @@ -213,25 +213,8 @@ else if (exponent_x < 0) } else { // get 128-bit product: coefficient_x*coefficient_y //__mul_64x64_to_128(P, coefficient_x, coefficient_y); - { - long __CX = coefficient_x; - long __CY = coefficient_y; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P_w1 = __PH + (__PM >>> 32); - P_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + P_w1 = Mul64Impl.unsignedMultiplyHigh(coefficient_x, coefficient_y); + P_w0 = coefficient_x * coefficient_y; // tighten binary range of P: leading bit is 2^bp // unbiased_bin_expon_product <= bp <= unbiased_bin_expon_product+1 @@ -291,88 +274,20 @@ else if (exponent_x < 0) long _ALBL_w1, _ALBH_w1, _AHBL_w1, _AHBH_w1, _QM_w1, _QM2_w1; //__mul_64x64_to_128(ALBH, (A)_w0, (B)_w1); - { - long __CX = _A_w0; - long __CY = _B_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w1); + _ALBH_w0 = _A_w0 * _B_w1; //__mul_64x64_to_128(AHBL, (B)_w0, (A)_w1); - { - long __CX = _B_w0; - long __CY = _A_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBL_w1 = __PH + (__PM >>> 32); - _AHBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBL_w1 = Mul64Impl.unsignedMultiplyHigh(_B_w0, _A_w1); + _AHBL_w0 = _B_w0 * _A_w1; //__mul_64x64_to_128(ALBL, (A)_w0, (B)_w0); - { - long __CX = _A_w0; - long __CY = _B_w0; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w0); + _ALBL_w0 = _A_w0 * _B_w0; //__mul_64x64_to_128(AHBH, (A)_w1,(B)_w1); - { - long __CX = _A_w1; - long __CY = _B_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBH_w1 = __PH + (__PM >>> 32); - _AHBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w1, _B_w1); + _AHBH_w0 = _A_w1 * _B_w1; //__add_128_128(QM, ALBH, AHBL); // add 128-bit value to 128-bit assume no carry-out { @@ -486,88 +401,20 @@ else if (exponent_x < 0) long _ALBL_w1, _ALBH_w1, _AHBL_w1, _AHBH_w1, _QM_w1, _QM2_w1; //__mul_64x64_to_128(ALBH, (A)_w0, (B)_w1); - { - long __CX = _A_w0; - long __CY = _B_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w1); + _ALBH_w0 = _A_w0 * _B_w1; //__mul_64x64_to_128(AHBL, (B)_w0, (A)_w1); - { - long __CX = _B_w0; - long __CY = _A_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBL_w1 = __PH + (__PM >>> 32); - _AHBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBL_w1 = Mul64Impl.unsignedMultiplyHigh(_B_w0, _A_w1); + _AHBL_w0 = _B_w0 * _A_w1; //__mul_64x64_to_128(ALBL, (A)_w0, (B)_w0); - { - long __CX = _A_w0; - long __CY = _B_w0; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w0, _B_w0); + _ALBL_w0 = _A_w0 * _B_w0; //__mul_64x64_to_128(AHBH, (A)_w1,(B)_w1); - { - long __CX = _A_w1; - long __CY = _B_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _AHBH_w1 = __PH + (__PM >>> 32); - _AHBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _AHBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A_w1, _B_w1); + _AHBH_w0 = _A_w1 * _B_w1; //__add_128_128(QM, ALBH, AHBL); // add 128-bit value to 128-bit assume no carry-out { @@ -731,45 +578,11 @@ static long get_BID64_small_mantissa(final long sgn, int expon, long coeff) { long _ALBL_w0, _ALBL_w1, _ALBH_w0, _ALBH_w1, _QM2_w0, _QM2_w1; //__mul_64x64_to_128(out ALBH, A, B.w1); - { - final long __CX = _A; - final long __CY = _B_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A, _B_w1); + _ALBH_w0 = _A * _B_w1; //__mul_64x64_to_128(out ALBL, A, B.w0); - { - final long __CX = _A; - final long __CY = _B_w0; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A, _B_w0); + _ALBL_w0 = _A * _B_w0; Q_low_w0 = _ALBL_w0; //__add_128_64(out QM2, ALBH, ALBL.w1); @@ -881,45 +694,11 @@ public static long get_BID64(long sgn, int expon, long coeff) { long _ALBL_w0, _ALBL_w1, _ALBH_w0, _ALBH_w1, _QM2_w0, _QM2_w1; //__mul_64x64_to_128(out ALBH, A, B.w1); - { - final long __CX = _A; - final long __CY = _B_w1; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(_A, _B_w1); + _ALBH_w0 = _A * _B_w1; //__mul_64x64_to_128(out ALBL, A, B.w0); - { - final long __CX = _A; - final long __CY = _B_w0; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = __CX >>> 32; - __CXL = LONG_LOW_PART & __CX; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(_A, _B_w0); + _ALBL_w0 = _A * _B_w0; Q_low_w0 = _ALBL_w0; //__add_128_64(out QM2, ALBH, ALBL.w1); diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplParse.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplParse.java index f82b3788..8fab574f 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplParse.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplParse.java @@ -509,41 +509,11 @@ public static long get_BID64_UF(final long sgn, final int expon, long coeff, fin long _ALBL_w0, _ALBL_w1, _ALBH_w0, _ALBH_w1, _QM2_w0, _QM2_w1; //__mul_64x64_to_128(out ALBH, A, B.w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C128_w0 >>> 32; - __CXL = LONG_LOW_PART & C128_w0; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(C128_w0, _B_w1); + _ALBH_w0 = C128_w0 * _B_w1; //__mul_64x64_to_128(out ALBL, A, B.w0); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C128_w0 >>> 32; - __CXL = LONG_LOW_PART & C128_w0; - __CYH = _B_w0 >>> 32; - __CYL = LONG_LOW_PART & _B_w0; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(C128_w0, _B_w0); + _ALBL_w0 = C128_w0 * _B_w0; Q_low_w0 = _ALBL_w0; //__add_128_64(out QM2, ALBH, ALBL.w1); @@ -688,41 +658,11 @@ public static long get_BID64(long sgn, int expon, long coeff, int rmode, Floatin long _ALBL_w0, _ALBL_w1, _ALBH_w0, _ALBH_w1, _QM2_w0, _QM2_w1; //__mul_64x64_to_128(out ALBH, A, B.w1); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coeff >>> 32; - __CXL = LONG_LOW_PART & coeff; - __CYH = _B_w1 >>> 32; - __CYL = LONG_LOW_PART & _B_w1; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBH_w1 = __PH + (__PM >>> 32); - _ALBH_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBH_w1 = Mul64Impl.unsignedMultiplyHigh(coeff, _B_w1); + _ALBH_w0 = coeff * _B_w1; //__mul_64x64_to_128(out ALBL, A, B.w0); - { - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = coeff >>> 32; - __CXL = LONG_LOW_PART & coeff; - __CYH = _B_w0 >>> 32; - __CYL = LONG_LOW_PART & _B_w0; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - _ALBL_w1 = __PH + (__PM >>> 32); - _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); - } + _ALBL_w1 = Mul64Impl.unsignedMultiplyHigh(coeff, _B_w0); + _ALBL_w0 = coeff * _B_w0; Q_low_w0 = _ALBL_w0; //__add_128_64(out QM2, ALBH, ALBL.w1); diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplRound.java b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplRound.java index 64bf8aa1..e661dfa3 100644 --- a/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplRound.java +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/JavaImplRound.java @@ -136,22 +136,9 @@ public static long bid64_round_integral_exact(long /*BID_UINT64*/ x, final int r //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_ten2mk64[ind - 1]; + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // if (0 < f* < 10^(-x)) then the result is a midpoint @@ -239,22 +226,9 @@ public static long bid64_round_integral_exact(long /*BID_UINT64*/ x, final int r //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_ten2mk64[ind - 1]; + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // if (0 < f* < 10^(-x)) then the result is a midpoint @@ -331,22 +305,9 @@ public static long bid64_round_integral_exact(long /*BID_UINT64*/ x, final int r //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_ten2mk64[ind - 1]; + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // C* = floor(C*) (logical right shift; C has p decimal digits, @@ -397,22 +358,9 @@ public static long bid64_round_integral_exact(long /*BID_UINT64*/ x, final int r //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_ten2mk64[ind - 1]; + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // C* = floor(C*) (logical right shift; C has p decimal digits, @@ -463,22 +411,9 @@ public static long bid64_round_integral_exact(long /*BID_UINT64*/ x, final int r //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_ten2mk64[ind - 1]; + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // C* = floor(C*) (logical right shift; C has p decimal digits, @@ -606,22 +541,9 @@ public static long bid64_round_integral_nearest_even(long /*BID_UINT64*/ x, fina //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_ten2mk64[ind - 1]; + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // if (0 < f* < 10^(-x)) then the result is a midpoint @@ -755,22 +677,9 @@ public static long bid64_round_integral_negative(long /*BID_UINT64*/ x, final in //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_ten2mk64[ind - 1]; + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // C* = floor(C*) (logical right shift; C has p decimal digits, @@ -904,22 +813,9 @@ public static long bid64_round_integral_positive(long /*BID_UINT64*/ x, final in //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_ten2mk64[ind - 1]; + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // C* = floor(C*) (logical right shift; C has p decimal digits, @@ -1047,22 +943,9 @@ public static long bid64_round_integral_zero(long /*BID_UINT64*/ x, final int rn //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_ten2mk64[ind - 1]; + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + P128_w0 = C1 * __CY; } // C* = floor(C*) (logical right shift; C has p decimal digits, @@ -1185,22 +1068,9 @@ public static long bid64_round_integral_nearest_away(long /*BID_UINT64*/ x, fina //__mul_64x64_to_128(P128, C1, bid_ten2mk64[ind - 1]); { - long __CY = bid_ten2mk64[ind - 1]; - long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; - __CXH = C1 >>> 32; - __CXL = LONG_LOW_PART & C1; - __CYH = __CY >>> 32; - __CYL = LONG_LOW_PART & __CY; - - __PM = __CXH * __CYL; - __PH = __CXH * __CYH; - __PL = __CXL * __CYL; - __PM2 = __CXL * __CYH; - __PH += (__PM >>> 32); - __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); - - P128_w1 = __PH + (__PM >>> 32); - P128_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + final long __CY = bid_ten2mk64[ind - 1]; + P128_w1 = Mul64Impl.unsignedMultiplyHigh(C1, __CY); + // P128_w0 = C1 * __CY; // @optimization } // if (0 < f* < 10^(-x)) then the result is a midpoint diff --git a/java/dfp/src/main/java/com/epam/deltix/dfp/Mul64Impl.java b/java/dfp/src/main/java/com/epam/deltix/dfp/Mul64Impl.java new file mode 100644 index 00000000..9f01db5e --- /dev/null +++ b/java/dfp/src/main/java/com/epam/deltix/dfp/Mul64Impl.java @@ -0,0 +1,30 @@ +package com.epam.deltix.dfp; + +import static com.epam.deltix.dfp.JavaImplAdd.LONG_LOW_PART; + +class Mul64Impl { + public static long multiplyHigh(final long __CX, final long __CY) { + //__mul_64x64_to_128(ALBL, (A)_w0, (B)_w0); + { + long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; + __CXH = __CX >>> 32; + __CXL = LONG_LOW_PART & __CX; + __CYH = __CY >>> 32; + __CYL = LONG_LOW_PART & __CY; + + __PM = __CXH * __CYL; + __PH = __CXH * __CYH; + __PL = __CXL * __CYL; + __PM2 = __CXL * __CYH; + __PH += (__PM >>> 32); + __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); + + return /*w1 =*/ __PH + (__PM >>> 32); +// _ALBL_w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + } + } + + public static long unsignedMultiplyHigh(final long A, final long T) { + return multiplyHigh(A, T); + } +} diff --git a/java/dfp/src/main/java18/com/epam/deltix/dfp/Mul64Impl.java b/java/dfp/src/main/java18/com/epam/deltix/dfp/Mul64Impl.java new file mode 100644 index 00000000..5f9bbd67 --- /dev/null +++ b/java/dfp/src/main/java18/com/epam/deltix/dfp/Mul64Impl.java @@ -0,0 +1,11 @@ +package com.epam.deltix.dfp; + +class Mul64Impl { + public static long multiplyHigh(final long A, final long T) { + return Math.multiplyHigh(A, T); + } + + public static long unsignedMultiplyHigh(final long A, final long T) { + return Math.unsignedMultiplyHigh(A, T); + } +} diff --git a/java/dfp/src/main/java9/com/epam/deltix/dfp/Mul64Impl.java b/java/dfp/src/main/java9/com/epam/deltix/dfp/Mul64Impl.java new file mode 100644 index 00000000..ec347837 --- /dev/null +++ b/java/dfp/src/main/java9/com/epam/deltix/dfp/Mul64Impl.java @@ -0,0 +1,15 @@ +package com.epam.deltix.dfp; + +import static com.epam.deltix.dfp.JavaImplAdd.LONG_LOW_PART; + +class Mul64Impl { + public static long multiplyHigh(final long A, final long T) { + return Math.multiplyHigh(A, T); + } + + public static long unsignedMultiplyHigh(final long x, final long y) { + return Math.multiplyHigh(x, y) + + (y & (x >> 63)) + // equivalent to `if (x < 0) result += y;` + (x & (y >> 63)); // equivalent to `if (y < 0) result += x;` + } +} diff --git a/java/dfp/src/test/java/com/epam/deltix/dfp/JavaImplTest.java b/java/dfp/src/test/java/com/epam/deltix/dfp/JavaImplTest.java index 7fdb010b..36767372 100644 --- a/java/dfp/src/test/java/com/epam/deltix/dfp/JavaImplTest.java +++ b/java/dfp/src/test/java/com/epam/deltix/dfp/JavaImplTest.java @@ -15,6 +15,7 @@ import java.util.Random; import static com.epam.deltix.dfp.JavaImpl.*; +import static com.epam.deltix.dfp.JavaImplAdd.LONG_LOW_PART; import static com.epam.deltix.dfp.JavaImplCmp.MASK_BINARY_SIG2; import static com.epam.deltix.dfp.JavaImplCmp.MASK_BINARY_OR2; import static com.epam.deltix.dfp.TestUtils.*; @@ -898,4 +899,99 @@ public void unsignedReplacementTest() { assertEquals(sig_x > 9999999999999999L, UnsignedLong.isGreater(sig_x, 9999999999999999L)); assertEquals(sig_x < Long.MAX_VALUE, UnsignedLong.isLess(sig_x, Long.MAX_VALUE)); } + + @Test + public void div10Test() { + final long coefficient = Long.MAX_VALUE / FAST_DIV10_RECIPROCAL; // Critical point + assertTrue(coefficient > Integer.MAX_VALUE); + + final long r = coefficient / 10; + + long p = coefficient * FAST_DIV10_RECIPROCAL; + final long coefficient10 = p >> FAST_DIV10_SHIFT; + + assertEquals(r, coefficient10); + } + + @Test + public void mul0() { + final long[] testValues = new long[]{ + 0, 4, 5, 10, 1153, 1155, + Integer.MAX_VALUE - 11, Integer.MAX_VALUE - 1, + Integer.MAX_VALUE, 0x80000000L, + 0x80000001L, 0x80000007L, + Long.MAX_VALUE - 13, Long.MAX_VALUE - 1, + Long.MAX_VALUE, 0x8000000000000000L, + 0x8000000000000001L, 0x8000000000000011L, + 0xFFFFFFFFFFFFFFFCL, 0xFFFFFFFFFFFFFFFFL, + }; + + final boolean[] doOp = new boolean[]{false, true}; + + for (final long au : testValues) + for (final long bu : testValues) + for (final boolean negA : doOp) + for (final boolean negB : doOp) + for (final boolean shrA : doOp) + for (final boolean shrB : doOp) { + + final long a = prepareArg(au, negA, shrA); + final long b = prepareArg(bu, negB, shrB); + + final long w1, w0; + { + long __CX = a; + long __CY = b; + long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; + __CXH = __CX >>> 32; + __CXL = LONG_LOW_PART & __CX; + __CYH = __CY >>> 32; + __CYL = LONG_LOW_PART & __CY; + + __PM = __CXH * __CYL; + __PH = __CXH * __CYH; + __PL = __CXL * __CYL; + __PM2 = __CXL * __CYH; + __PH += (__PM >>> 32); + __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); + + w1 = __PH + (__PM >>> 32); + w0 = (__PM << 32) + (LONG_LOW_PART & __PL); + } + final BigInteger rOld = unsignedLongToBigInteger(w1).multiply(twoPow64).add(unsignedLongToBigInteger(w0)); + + final BigInteger ab = unsignedLongToBigInteger(a); + final BigInteger bb = unsignedLongToBigInteger(b); + final BigInteger rb = ab.multiply(bb); + + if (!rb.equals(rOld)) + throw new RuntimeException("The case " + a + " * " + b + " result " + rb + " != " + rOld); + + final long m1 = Mul64Impl.unsignedMultiplyHigh(a, b); + final long m0 = a * b; + + if (w1 != m1 || w0 != m0) + throw new RuntimeException("The case " + Long.toHexString(a) + " * " + Long.toHexString(b) + " result [" + + Long.toHexString(w1) + ", " + Long.toHexString(w0) + "] != [" + + Long.toHexString(m1) + ", " + Long.toHexString(m0) + "]"); + } + } + + private static long prepareArg(long x, final boolean negX, final boolean shrX) { + if (negX) + x = -x; + if (shrX) + x = x >>> 1; + return x; + } + + private static final BigInteger twoPow64 = unsignedLongToBigInteger(0x100000000L).multiply(unsignedLongToBigInteger(0x100000000L)); + + private static BigInteger unsignedLongToBigInteger(long x) { + final byte[] p = new byte[9]; + for (int i = 0; i < p.length; ++i, x = x >>> 8) + p[p.length - 1 - i] = (byte) (x & 0xFF); + + return new BigInteger(p); + } } diff --git a/java/dfpNativeTests/build.gradle b/java/dfpNativeTests/build.gradle index 510746a2..c7d3021c 100644 --- a/java/dfpNativeTests/build.gradle +++ b/java/dfpNativeTests/build.gradle @@ -1,13 +1,12 @@ plugins { - id "me.champeau.gradle.jmh" version "0.5.3" apply false + id "me.champeau.jmh" version "0.7.1" } apply plugin: 'java' -apply plugin: 'me.champeau.gradle.jmh' group = 'com.epam.deltix' -sourceCompatibility = 1.8 +sourceCompatibility = 8 repositories { mavenCentral() @@ -41,7 +40,29 @@ task copyNativeDfpResources(type: Copy) { } compileJava.dependsOn(copyNativeDfpResources) -jmh { - include = '.*Benchmark.*' +task testsJar(type: Jar, dependsOn: [jar, testClasses, processTestResources]) { + archiveClassifier = 'tests' + from sourceSets.test.output } +task copyTestDeps(type: Copy) { + from(sourceSets.test.runtimeClasspath) { include '*.jar' } + into('testLibs') +} + +task copyTestJars(type: Copy, dependsOn: [testsJar, copyTestDeps]) { + from(jar.outputs.files) + from(testsJar.outputs.files) + into('testLibs') +} + +task runTestJars(type: JavaExec) { + mainClass = 'org.junit.runner.JUnitCore' + classpath = files { file('testLibs').listFiles() } + + def testClassesRoot = file('src/test/java').absolutePath + fileTree(dir: testClassesRoot, include: '**/*Test.java').each { File file -> + def ap = file.absolutePath + args += ap.substring(testClassesRoot.length() + 1, ap.length() - 5).replace(File.separator, '.') + } +} diff --git a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/CanonizeBenchmark.java b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/CanonizeBenchmark.java index 8652a438..cea1d978 100644 --- a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/CanonizeBenchmark.java +++ b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/CanonizeBenchmark.java @@ -1,6 +1,7 @@ package com.epam.deltix.dfp; import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.Options; @@ -8,6 +9,8 @@ import java.util.concurrent.TimeUnit; +import static com.epam.deltix.dfp.JavaImpl.*; + @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @Warmup(time = 2, iterations = 4) @@ -15,18 +18,64 @@ @State(Scope.Thread) @Fork(3) public class CanonizeBenchmark { - /* 10., 1000_000., 123.456789123, 1.23, null(=NaN) */ - @Param({"3584865303386914826", "3584865303387914816", "3503800633551035011", "3566850904877432955", "-128"}) + /* 10., 1000_000., 123.456789123, 1.23, null(=NaN), 0., 1., 1000., 1000000., 1000000000. */ + @Param({"3584865303386914826", "3584865303387914816", "3503800633551035011", "3566850904877432955", "-128", + "3584865303386914816", "3584865303386914817", "3584865303386915816", "3584865303387914816", "3584865304386914816"}) private long decimalValue; @Benchmark - public long canonize() { - return Decimal64Utils.canonize(decimalValue); + public void canonize(Blackhole bh) { + bh.consume(Decimal64Utils.canonize(decimalValue)); + } + + @Benchmark + public void canonizeOrig(Blackhole bh) { + bh.consume(canonizeFiniteOrig(decimalValue)); + } + + public static long canonizeFiniteOrig(final long value) { + final long signMask = value & MASK_SIGN; + long coefficient; + int exponent; + + if (isSpecial(value)) { + assert (isFinite(value)); + + // Check for non-canonical values. + final long x = (value & LARGE_COEFFICIENT_MASK) | LARGE_COEFFICIENT_HIGH_BIT; + coefficient = x > MAX_COEFFICIENT ? 0 : x; + + // Extract exponent. + final long tmp = value >> EXPONENT_SHIFT_LARGE; + exponent = (int) (tmp & EXPONENT_MASK); + } else { + // Extract coefficient. + coefficient = (value & SMALL_COEFFICIENT_MASK); + + // Extract exponent. Maximum biased value for "small exponent" is 0x2FF(*2=0x5FE), signed: [] + // upper 1/4 of the mask range is "special", as checked in the code above + final long tmp = value >> EXPONENT_SHIFT_SMALL; + exponent = (int) (tmp & EXPONENT_MASK); + } + + if (coefficient == 0) + return ZERO; + + long div10 = coefficient / 10; + if (div10 * 10 != coefficient) + return value; + + do { + coefficient = div10; + div10 /= 10; + ++exponent; + } while (div10 * 10 == coefficient); + return pack(signMask, exponent, coefficient, BID_ROUNDING_TO_NEAREST); } public static void main(final String[] args) throws RunnerException { final Options opt = new OptionsBuilder() - .include(".*" + UnaryOperationBenchmark.class.getSimpleName() + ".*") + .include(".*" + CanonizeBenchmark.class.getSimpleName() + ".*") .forks(1) .build(); new Runner(opt).run(); diff --git a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java index d7923f1c..6cb857aa 100644 --- a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java +++ b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MathBenchmark.java @@ -9,6 +9,9 @@ import java.util.concurrent.TimeUnit; +import static com.epam.deltix.dfp.JavaImpl.*; +import static com.epam.deltix.dfp.JavaImpl.EXPONENT_SHIFT_SMALL; + @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @Warmup(time = 3, iterations = 1) @@ -16,14 +19,18 @@ @State(Scope.Thread) public class MathBenchmark { private long[] decimalValues; + private double[] doubleValues; public static int fixedSeed = 42 * 42 * 42 * 42 * 42; @Setup public void setUp() { TestUtils.RandomDecimalsGenerator generator = new TestUtils.RandomDecimalsGenerator(fixedSeed); decimalValues = new long[1004]; - for (int i = 0; i < decimalValues.length; ++i) + doubleValues = new double[decimalValues.length]; + for (int i = 0; i < decimalValues.length; ++i) { decimalValues[i] = generator.nextX(); + doubleValues[i] = Decimal64Utils.toDouble(decimalValues[i]); + } } @Benchmark @@ -242,6 +249,59 @@ public void fdimOld(Blackhole bh) { // bh.consume(NativeImpl.bid64Fdim(decimalValues[i], Decimal64Utils.negate(decimalValues[i + 1]))); // } + @Benchmark + public void fromDecimalDouble(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(Decimal64Utils.fromDecimalDouble(doubleValues[i])); + } + + @Benchmark + public void fromDecimalDoubleOrig(Blackhole bh) { + for (int i = 0; i < 1000; ++i) + bh.consume(fromDecimalDoubleOrig(doubleValues[i])); + } + + public static long fromDecimalDoubleOrig(final double x) { + final long y = Decimal64Utils.fromDouble(x); + long m, signAndExp; + + // Odd + special encoding(16 digits) + final long notY = ~y; + if ((MASK_SPECIAL & notY) == 0) { + if ((MASK_INFINITY_AND_NAN & notY) == 0) + return y; + + m = (y & LARGE_COEFFICIENT_MASK) + LARGE_COEFFICIENT_HIGH_BIT; + signAndExp = ((y << 2) & EXPONENT_MASK_SMALL) + (y & MASK_SIGN); + } else { + m = y & SMALL_COEFFICIENT_MASK; + // 16 digits + odd + signAndExp = y & (-1L << EXPONENT_SHIFT_SMALL); + if (m <= MAX_COEFFICIENT / 10 + 1) + return y; + } + + if ((y & 1) == 0) + return y; + // NeedAdjustment + // Check the last digit + final long m1 = m + 1; + m = m1 / 10; + if (m1 - m * 10 > 2) + return y; + + signAndExp += 1L << EXPONENT_SHIFT_SMALL; + if (Decimal64Utils.toDouble(signAndExp + m) != x) + return y; + + for (long n = m; ; ) { + final long m10 = n / 10; + if (m10 * 10 != n) + return signAndExp + n; + n = m10; + signAndExp += 1L << EXPONENT_SHIFT_SMALL; + } + } public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder() diff --git a/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java new file mode 100644 index 00000000..3c89a747 --- /dev/null +++ b/java/dfpNativeTests/src/jmh/java/com/epam/deltix/dfp/MulBenchmark.java @@ -0,0 +1,87 @@ +package com.epam.deltix.dfp; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.util.concurrent.TimeUnit; + +import static com.epam.deltix.dfp.JavaImpl.*; +import static com.epam.deltix.dfp.JavaImpl.EXPONENT_SHIFT_SMALL; +import static com.epam.deltix.dfp.JavaImplAdd.LONG_LOW_PART; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Warmup(time = 3, iterations = 1) +@Measurement(time = 3, iterations = 3) +@State(Scope.Thread) +public class MulBenchmark { + private long[] decimalValues; + private double[] doubleValues; + public static int fixedSeed = 42 * 42 * 42 * 42 * 42; + + @Setup + public void setUp() { + TestUtils.RandomDecimalsGenerator generator = new TestUtils.RandomDecimalsGenerator(fixedSeed); + decimalValues = new long[1004]; + doubleValues = new double[decimalValues.length]; + for (int i = 0; i < decimalValues.length; ++i) { + decimalValues[i] = generator.nextX(); + doubleValues[i] = Decimal64Utils.toDouble(decimalValues[i]); + } + } + + @Benchmark + public void mul0(Blackhole bh) { + for (int i = 0; i < 1000; ++i) { + final long __CX = decimalValues[i]; + final long __CY = decimalValues[i + 1]; + long __CXH, __CXL, __CYH, __CYL, __PL, __PH, __PM, __PM2; + __CXH = __CX >>> 32; + __CXL = LONG_LOW_PART & __CX; + __CYH = __CY >>> 32; + __CYL = LONG_LOW_PART & __CY; + + __PM = __CXH * __CYL; + __PH = __CXH * __CYH; + __PL = __CXL * __CYL; + __PM2 = __CXL * __CYH; + __PH += (__PM >>> 32); + __PM = (LONG_LOW_PART & __PM) + __PM2 + (__PL >>> 32); + + bh.consume(__PH + (__PM >>> 32)); + bh.consume((__PM << 32) + (LONG_LOW_PART & __PL)); + } + } + + @Benchmark + public void multiplyHigh(Blackhole bh) { + for (int i = 0; i < 1000; ++i) { + final long A = decimalValues[i]; + final long T = decimalValues[i + 1]; + bh.consume(Mul64Impl.multiplyHigh(A, T)); + bh.consume(A * T); + } + } + + @Benchmark + public void unsignedMultiplyHigh(Blackhole bh) { + for (int i = 0; i < 1000; ++i) { + final long A = decimalValues[i]; + final long T = decimalValues[i + 1]; + bh.consume(Mul64Impl.unsignedMultiplyHigh(A, T)); + bh.consume(A * T); + } + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(".*" + MulBenchmark.class.getSimpleName() + ".*") + .forks(1) + .build(); + new Runner(opt).run(); + } +} diff --git a/java/systemInfo/build.gradle b/java/systemInfo/build.gradle index 71351d4c..d9f044cc 100644 --- a/java/systemInfo/build.gradle +++ b/java/systemInfo/build.gradle @@ -6,7 +6,7 @@ repositories { mavenCentral() } -sourceCompatibility = 1.7 +sourceCompatibility = 8 jar { manifest { diff --git a/java/vtaTest/build.gradle b/java/vtaTest/build.gradle index 0f5f8452..b1d55f4a 100644 --- a/java/vtaTest/build.gradle +++ b/java/vtaTest/build.gradle @@ -42,3 +42,27 @@ task testVta(dependsOn: compileJava, type: JavaExec) { jvmArgs += '-javaagent:' + vta_path + '=' + vta_config } test.dependsOn testVta + +task copyTestDeps(type: Copy) { + from(sourceSets.main.runtimeClasspath) { include '*.jar' } + into('testLibs') +} + +task copyTestJars(type: Copy, dependsOn: [jar, copyTestDeps]) { + from(jar.outputs.files) + into('testLibs') +} + +task runTestJars(type: JavaExec) { + mainClass = "com.example.demo.DemoApplication" + classpath = files { file('testLibs').listFiles() } + workingDir = "$projectDir" + + String vta_path = findDependency(".*value-types.*jar") + print "VTA path: " + vta_path + "\n" + + String vta_config = "$projectDir/value-types.json" + print "VTA config: " + vta_config + "\n" + + jvmArgs += '-javaagent:' + vta_path + '=' + vta_config +}