forked from codeplea/ahocorasickphp
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Issue codeplea#1 - update your utility
* Updates the code to use PSR-2 formatting (https://www.php-fig.org/psr/psr-2/). This requires some changes to formatting of class and method names. * Adds composer to the project. Currently only pulls in `phpunit` as a development requirement. * Adds `phpunit` tests created from the `example.php` code along with testing the various exceptions that can be thrown. * Some code polish based on static analysis.
- Loading branch information
traack_lcruz
committed
Oct 1, 2018
1 parent
64d696b
commit 34cb759
Showing
10 changed files
with
1,730 additions
and
76 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
/vendor/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
<?php | ||
|
||
/* | ||
* ahocorasick - fast string searching in php | ||
* | ||
* Copyright (c) 2017-2018 Lewis Van Winkle | ||
* | ||
* http://CodePlea.com | ||
* | ||
* This software is provided 'as-is', without any express or implied | ||
* warranty. In no event will the authors be held liable for any damages | ||
* arising from the use of this software. | ||
* | ||
* Permission is granted to anyone to use this software for any purpose, | ||
* including commercial applications, and to alter it and redistribute it | ||
* freely, subject to the following restrictions: | ||
* | ||
* 1. The origin of this software must not be misrepresented; you must not | ||
* claim that you wrote the original software. If you use this software | ||
* in a product, an acknowledgement in the product documentation would be | ||
* appreciated but is not required. | ||
* 2. Altered source versions must be plainly marked as such, and must not be | ||
* misrepresented as being the original software. | ||
* 3. This notice may not be removed or altered from any source distribution. | ||
* | ||
*/ | ||
|
||
|
||
|
||
class AhoCorasick | ||
{ | ||
|
||
private $nodes = [ [] ]; | ||
|
||
private $final = 0; | ||
|
||
/** | ||
* Add a keyword to search for. | ||
* | ||
* @param string $needle | ||
* @throws Exception | ||
*/ | ||
public function addNeedle(string $needle) | ||
{ | ||
if ($this->final) { | ||
throw new Exception('Cannot add word to finalized ahocorasick.'); | ||
} | ||
|
||
$nodes = &$this->nodes; | ||
$n = 0; | ||
|
||
$needleLength = strlen($needle); | ||
|
||
for ($i = 0; $i < $needleLength; ++$i) { | ||
$c = $needle[$i]; | ||
|
||
if (!isset($nodes[$n][$c])) { | ||
$nodes[$n][$c] = count($nodes); | ||
$nodes[] = []; | ||
} | ||
$n = $nodes[$n][$c]; | ||
} | ||
|
||
$nodes[$n][0][] = $needle; | ||
} | ||
|
||
/** | ||
* Create the structure needed to search text for the given keywords. | ||
* Once you call this, you cannot add additional keywords via addNeedle(). | ||
*/ | ||
public function finalize() | ||
{ | ||
$nodes = &$this->nodes; | ||
$queue = []; | ||
|
||
foreach ($nodes[0] as $j => $_) { | ||
$nodes[$nodes[0][$j]][1] = 0; | ||
$queue[] = $nodes[0][$j]; | ||
} | ||
|
||
while (count($queue)) { | ||
$r = $queue[0]; | ||
$queue = array_slice($queue, 1); | ||
|
||
foreach ($nodes[$r] as $j => $_) { | ||
if ($j === 0 || $j === 1) { | ||
continue; | ||
} | ||
$v = $nodes[$r][1]; | ||
$u = $nodes[$r][$j]; | ||
while ($v > 0 && !isset($nodes[$v][$j])) { | ||
$v = $nodes[$v][1]; | ||
} | ||
$nodes[$u][1] = $nodes[$v][$j] ?? $v; | ||
if (isset($nodes[$nodes[$u][1]][0])) { | ||
if (!isset($nodes[$u][0])) { | ||
$nodes[$u][0] = []; | ||
} | ||
$nodes[$u][0] = array_merge($nodes[$u][0], $nodes[$nodes[$u][1]][0]); | ||
} | ||
$queue[] = $u; | ||
} | ||
} | ||
|
||
$this->final = 1; | ||
} | ||
|
||
/** | ||
* Search the text for the given keywords. | ||
* | ||
* @param string $haystack | ||
* @return array | ||
* @throws Exception | ||
*/ | ||
public function search(string $haystack):array | ||
{ | ||
if (!$this->final) { | ||
throw new Exception('Must call finalize() before search.'); | ||
} | ||
|
||
$nodes = &$this->nodes; | ||
$found = []; | ||
$n = 0; | ||
|
||
$haystackLength = strlen($haystack); | ||
|
||
for ($i = 0; $i < $haystackLength; ++$i) { | ||
$c = $haystack[$i]; | ||
|
||
while (!array_key_exists($c, $nodes[$n]) && $n) { | ||
$n = $nodes[$n][1]; | ||
if ($n === null) { | ||
die(); | ||
} | ||
} | ||
|
||
if (isset($nodes[$n][$c])) { | ||
$n = $nodes[$n][$c]; | ||
} | ||
|
||
if (isset($nodes[$n][0])) { | ||
$z = $nodes[$n][0]; | ||
foreach ($z as $w) { | ||
$found[] = [$w, $i - strlen($w) + 1]; | ||
} | ||
} | ||
} | ||
|
||
return $found; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{ | ||
"name": "codeplea/ahocorasickphp", | ||
"description": "Aho-Corasick multi-keyword string searching library in PHP.", | ||
"type": "library", | ||
"config": { | ||
"sort-packages": true | ||
}, | ||
"require": { | ||
"php": ">=7.0" | ||
}, | ||
"require-dev": { | ||
"phpunit/phpunit": "^7.3" | ||
}, | ||
"license": "zlib" | ||
} |
Oops, something went wrong.