From 9249957dac013011c7e3c4f0f9db867dfaf62688 Mon Sep 17 00:00:00 2001 From: jvandreae <213101790+jvandreae@users.noreply.github.com> Date: Sun, 1 Jun 2025 10:56:46 -0700 Subject: [PATCH] Optimize computeScore Script: ```php getInt(0, strlen($chars) - 1)]; } $arr[] = [ 'key' => $key, ]; } $fuse = new \Fuse\Fuse($arr, [ 'isCaseSensitive' => true, 'includeScore' => true, 'minMatchCharLength' => 2, 'shouldSort' => true, 'keys' => ['key'], 'threshold' => 0.25, ]); if (function_exists('spx_profiler_start')) { spx_profiler_start(); } try { $start = microtime(true); $results = $fuse->search('test'); $end = microtime(true); printf("%g\n", $end-$start); } finally { if (function_exists('spx_profiler_stop')) { spx_profiler_stop(); } } ``` Before: ```console $ SPX_BUILTINs=1 SPX_ENABLED=1 SPX_REPORT=flat SPX_FP_LIVE=1 SPX_AUTO_START=0 php -d opcache.enable_cli=1 test.php *** SPX Report *** Global stats: Called functions : 2.0M Distinct functions : 27 Wall time : 2.76s ZE memory usage : 59.2KB Flat profile: Wall time | ZE memory usage | Inc. | *Exc. | Inc. | Exc. | Called | Function ----------+----------+----------+----------+----------+---------- 2.57s | 1.89s | 0B | 525.1MB | 100.0K | Fuse\Search\Bitap\search 484.0ms | 484.0ms | -537.9MB | -537.9MB | 1.5M | Fuse\Search\Bitap\computeScore 197.0ms | 197.0ms | 12.8MB | 12.8MB | 100.0K | Fuse\Search\Bitap\convertMaskToIndices 649.7us | 630.4us | 1.3KB | 1.3KB | 4 | 1@Composer\Autoload\{closure} 44.1us | 40.2us | 15.1KB | 15.1KB | 41 | Fuse\Core\{closure} 33.7us | 33.7us | 432B | 432B | 4 | Composer\Autoload\ClassLoader::findFileWithExtension 14.2us | 14.2us | 0B | 0B | 139 | Fuse\Helpers\sort 53.0us | 8.9us | 16.0KB | 960B | 1 | Fuse\Core\format 41.4us | 6.8us | 336B | 0B | 1 | /home/john/Code/Fuse/src/Search/Bitap/BitapSearch.php 441.5us | 4.0us | 2.8KB | 144B | 1 | Fuse\Core\Register::createSearcher $ php -d opcache.enable_cli=1 -d opcache.jit=1255 -d opcache.jit_buffer_size=200M -d memory_limit=-1 test.php 0.837517 ``` After: ```console $ SPX_BUILTINs=1 SPX_ENABLED=1 SPX_REPORT=flat SPX_FP_LIVE=1 SPX_AUTO_START=0 php -d opcache.enable_cli=1 test.php *** SPX Report *** Global stats: Called functions : 2.0M Distinct functions : 27 Wall time : 2.41s ZE memory usage : 59.2KB Flat profile: Wall time | ZE memory usage | Inc. | *Exc. | Inc. | Exc. | Called | Function ----------+----------+----------+----------+----------+---------- 2.23s | 1.79s | 0B | -12.8MB | 100.0K | Fuse\Search\Bitap\search 247.1ms | 247.1ms | 0B | 0B | 1.5M | Fuse\Search\Bitap\computeScore 197.0ms | 197.0ms | 12.8MB | 12.8MB | 100.0K | Fuse\Search\Bitap\convertMaskToIndices 661.9us | 639.6us | 1.3KB | 1.3KB | 4 | 1@Composer\Autoload\{closure} 32.1us | 32.1us | 432B | 432B | 4 | Composer\Autoload\ClassLoader::findFileWithExtension 33.4us | 29.8us | 15.1KB | 15.1KB | 41 | Fuse\Core\{closure} 14.2us | 14.2us | 0B | 0B | 139 | Fuse\Helpers\sort 44.0us | 10.3us | 336B | 0B | 1 | /home/john/Code/Fuse/src/Search/Bitap/BitapSearch.php 40.7us | 7.3us | 16.0KB | 960B | 1 | Fuse\Core\format 446.0us | 3.9us | 2.8KB | 144B | 1 | Fuse\Core\Register::createSearcher $ php -d opcache.enable_cli=1 -d opcache.jit=1255 -d opcache.jit_buffer_size=200M -d memory_limit=-1 test.php 0.737639 ``` --- src/Search/Bitap/computeScore.php | 31 +++++++++++++++++++------------ src/Search/Bitap/search.php | 31 ++++--------------------------- 2 files changed, 23 insertions(+), 39 deletions(-) diff --git a/src/Search/Bitap/computeScore.php b/src/Search/Bitap/computeScore.php index 5adc874..ebaa308 100644 --- a/src/Search/Bitap/computeScore.php +++ b/src/Search/Bitap/computeScore.php @@ -2,16 +2,23 @@ namespace Fuse\Search\Bitap; -use function Fuse\Core\config; - -function computeScore(string $pattern, array $options = []) -{ - $errors = $options['errors'] ?? 0; - $currentLocation = $options['currentLocation'] ?? 0; - $expectedLocation = $options['expectedLocation'] ?? 0; - $distance = $options['distance'] ?? config('distance'); - $ignoreLocation = $options['ignoreLocation'] ?? config('ignoreLocation'); - +/** + * @param string $pattern + * @param int $errors + * @param int $currentLocation + * @param int $expectedLocation + * @param int $distance + * @param bool $ignoreLocation + * @return float + */ +function computeScore( + $pattern, + $errors, + $currentLocation, + $expectedLocation, + $distance, + $ignoreLocation +) { $accuracy = $errors / mb_strlen($pattern); if ($ignoreLocation) { @@ -20,9 +27,9 @@ function computeScore(string $pattern, array $options = []) $proximity = abs($expectedLocation - $currentLocation); - if (!$distance) { + if ($distance === 0) { // Dodge divide by zero error. - return $proximity ? 1.0 : $accuracy; + return $proximity === 0 ? 1.0 : $accuracy; } return $accuracy + $proximity / $distance; diff --git a/src/Search/Bitap/search.php b/src/Search/Bitap/search.php index 2b09d8e..ac00b3b 100644 --- a/src/Search/Bitap/search.php +++ b/src/Search/Bitap/search.php @@ -49,12 +49,7 @@ function search(string $text, string $pattern, array $patternAlphabet, array $op // Get all exact matches, here for speed up while (($index = mb_strpos($text, $pattern, $bestLocation)) !== false) { - $score = computeScore($pattern, [ - 'currentLocation' => $index, - 'expectedLocation' => $expectedLocation, - 'distance' => $distance, - 'ignoreLocation' => $ignoreLocation, - ]); + $score = computeScore($pattern, 0, $index, $expectedLocation, $distance, $ignoreLocation); $currentThreshold = min($score, $currentThreshold); $bestLocation = $index + $patternLen; @@ -85,13 +80,7 @@ function search(string $text, string $pattern, array $patternAlphabet, array $op $binMid = $binMax; while ($binMin < $binMid) { - $score = computeScore($pattern, [ - 'errors' => $i, - 'currentLocation' => $expectedLocation + $binMid, - 'expectedLocation' => $expectedLocation, - 'distance' => $distance, - 'ignoreLocation' => $ignoreLocation, - ]); + $score = computeScore($pattern, $i, $expectedLocation + $binMid, $expectedLocation, $distance, $ignoreLocation); if ($score <= $currentThreshold) { $binMin = $binMid; @@ -136,13 +125,7 @@ function search(string $text, string $pattern, array $patternAlphabet, array $op } if ($bitArr[$j] & $mask) { - $finalScore = computeScore($pattern, [ - 'errors' => $i, - 'currentLocation' => $currentLocation, - 'expectedLocation' => $expectedLocation, - 'distance' => $distance, - 'ignoreLocation' => $ignoreLocation, - ]); + $finalScore = computeScore($pattern, $i, $currentLocation, $expectedLocation, $distance, $ignoreLocation); // This match will almost certainly be better than any existing match. // But check anyway. @@ -163,13 +146,7 @@ function search(string $text, string $pattern, array $patternAlphabet, array $op } // No hope for a (better) match at greater error levels. - $score = computeScore($pattern, [ - 'errors' => $i + 1, - 'currentLocation' => $expectedLocation, - 'expectedLocation' => $expectedLocation, - 'distance' => $distance, - 'ignoreLocation' => $ignoreLocation, - ]); + $score = computeScore($pattern, $i + 1, $expectedLocation, $expectedLocation, $distance, $ignoreLocation); if ($score > $currentThreshold) { break;