From 9a33080aa7b94294b93be7075cadf761dfb26f20 Mon Sep 17 00:00:00 2001 From: Roman Parpalak Date: Wed, 15 Nov 2023 17:35:10 +0200 Subject: [PATCH] Adopted refactoring in ArrayStorage and fixed tests. --- src/S2/Rose/Indexer.php | 12 +-- src/S2/Rose/Storage/ArrayFulltextStorage.php | 38 ++++++-- src/S2/Rose/Storage/ArrayStorage.php | 83 +++++----------- src/S2/Rose/Storage/Database/PdoStorage.php | 7 +- .../Storage/File/SingleFileArrayStorage.php | 86 ++++++---------- .../Rose/Storage/FulltextProxyInterface.php | 7 +- tests/unit/Rose/FinderTest.php | 53 +++++----- tests/unit/Rose/Storage/PdoStorageTest.php | 97 +++++++++---------- .../Storage/SingleFileArrayStorageTest.php | 23 +++-- 9 files changed, 187 insertions(+), 219 deletions(-) diff --git a/src/S2/Rose/Indexer.php b/src/S2/Rose/Indexer.php index af53297..6f0f3c4 100644 --- a/src/S2/Rose/Indexer.php +++ b/src/S2/Rose/Indexer.php @@ -79,21 +79,21 @@ protected static function arrayFromStr(string $contents): array protected function addToIndex(ExternalId $externalId, string $title, ContentWithMetadata $content, string $keywords): void { $sentenceCollection = $content->getSentenceMap()->toSentenceCollection(); - $words = $sentenceCollection->getWordsArray(); - $words = array_merge($words, self::arrayFromStr(str_replace(', ', ' ', $keywords))); + $contentWords = $sentenceCollection->getWordsArray(); + $contentWords = array_merge($contentWords, self::arrayFromStr(str_replace(', ', ' ', $keywords))); // TODO not to merge - foreach ($words as $i => $word) { + foreach ($contentWords as $i => $word) { if ($this->storage->isExcludedWord($word)) { - unset($words[$i]); + unset($contentWords[$i]); } } - $this->storage->addMetadata($externalId, \count($words), $content->getImageCollection()); + $this->storage->addMetadata($externalId, \count($contentWords), $content->getImageCollection()); $this->storage->addSnippets($externalId, ...$sentenceCollection->getSnippetSources()); $this->storage->addToFulltextIndex( $this->getStemsWithComponents(self::arrayFromStr($title)), $this->getStemsWithComponents(self::arrayFromStr($keywords)), // TODO consider different semantics of space and comma? - $this->getStemsWithComponents($words), + $this->getStemsWithComponents($contentWords), $externalId ); } diff --git a/src/S2/Rose/Storage/ArrayFulltextStorage.php b/src/S2/Rose/Storage/ArrayFulltextStorage.php index 34d233e..c438810 100644 --- a/src/S2/Rose/Storage/ArrayFulltextStorage.php +++ b/src/S2/Rose/Storage/ArrayFulltextStorage.php @@ -8,6 +8,9 @@ class ArrayFulltextStorage implements FulltextProxyInterface { + public const PREFIX_KEYWORD = 'K'; + public const PREFIX_TITLE = 'T'; + /** * @var array|string[][] */ @@ -37,11 +40,17 @@ public function getByWord(string $word): array $result = []; foreach ($this->fulltextIndex[$word] as $id => $entries) { if (\is_int($entries)) { - $result[$id][] = $entries; + $result[$id][self::TYPE_CONTENT][] = $entries; } else { $entries = explode('|', $entries); foreach ($entries as $position) { - $result[$id][] = base_convert($position, 36, 10); + if ($position[0] === self::PREFIX_TITLE) { + $result[$id][self::TYPE_TITLE][] = base_convert(substr($position, 1), 36, 10); + } elseif ($position[0] === self::PREFIX_KEYWORD) { + $result[$id][self::TYPE_KEYWORD][] = base_convert(substr($position, 1), 36, 10); + } else { + $result[$id][self::TYPE_CONTENT][] = base_convert($position, 36, 10); + } } } } @@ -64,25 +73,38 @@ public function countByWord(string $word): int /** * {@inheritdoc} */ - public function addWord(string $word, int $id, int $position): void + public function addWord(string $word, int $id, int $type, int $position): void { - $word = (string)$word; if ($word === '') { return; } if (isset($this->fulltextIndex[$word][$id])) { + $positionStr = base_convert($position, 10, 36); + if ($type === self::TYPE_KEYWORD) { + $positionStr = self::PREFIX_KEYWORD . $positionStr; + } elseif ($type === self::TYPE_TITLE) { + $positionStr = self::PREFIX_TITLE . $positionStr; + } + $value = $this->fulltextIndex[$word][$id]; if (\is_int($value)) { - // There was the only one position, but it's no longer the case. + // There was the only one content position, but it's no longer the case. // Convert to the 36-based number system. - $this->fulltextIndex[$word][$id] = base_convert($value, 10, 36) . '|' . base_convert($position, 10, 36); + $this->fulltextIndex[$word][$id] = base_convert($value, 10, 36) . '|' . $positionStr; } else { // Appending - $this->fulltextIndex[$word][$id] = $value . '|' . base_convert($position, 10, 36); + $this->fulltextIndex[$word][$id] = $value . '|' . $positionStr; } } else { - // If there is the only one position in index, the position is stored as decimal number + // If there is the only one content position in index, the position is stored as decimal number + if ($type === self::TYPE_KEYWORD) { + /** @noinspection CallableParameterUseCaseInTypeContextInspection */ + $position = self::PREFIX_KEYWORD . base_convert($position, 10, 36); + } elseif ($type === self::TYPE_TITLE) { + /** @noinspection CallableParameterUseCaseInTypeContextInspection */ + $position = self::PREFIX_TITLE . base_convert($position, 10, 36); + } $this->fulltextIndex[$word][$id] = $position; } } diff --git a/src/S2/Rose/Storage/ArrayStorage.php b/src/S2/Rose/Storage/ArrayStorage.php index b6ed4b8..6051c0c 100644 --- a/src/S2/Rose/Storage/ArrayStorage.php +++ b/src/S2/Rose/Storage/ArrayStorage.php @@ -1,6 +1,6 @@ fulltextProxy->getByWord($word); - foreach ($data as $id => $positions) { + foreach ($data as $id => $positionsByType) { $externalId = $this->externalIdFromInternalId($id); if ($externalId === null) { continue; } if ($instanceId === null || $externalId->getInstanceId() === $instanceId) { - $result->add($word, $externalId, $positions, isset($this->metadata[$id]) ? $this->metadata[$id]['wordCount'] : 0); + $result->add($word, new FulltextIndexPositionBag( + $externalId, + $positionsByType[FulltextProxyInterface::TYPE_TITLE] ?? [], + $positionsByType[FulltextProxyInterface::TYPE_KEYWORD] ?? [], + $positionsByType[FulltextProxyInterface::TYPE_CONTENT] ?? [], + isset($this->metadata[$id]) ? $this->metadata[$id]['wordCount'] : 0 + )); } } } @@ -84,6 +64,7 @@ public function fulltextResultByWords(array $words, ?int $instanceId): FulltextI /** * {@inheritdoc} + * @throws UnknownIdException */ public function getSnippets(SnippetQuery $snippetQuery): SnippetResult { @@ -108,12 +89,17 @@ public function getSnippets(SnippetQuery $snippetQuery): SnippetResult * {@inheritdoc} * @throws UnknownIdException */ - public function addToFulltextIndex(array $contentWords, array $titleWords, array $keywords, ExternalId $externalId): void + public function addToFulltextIndex(array $titleWords, array $keywords, array $contentWords, ExternalId $externalId): void { - // TODO $id = $this->internalIdFromExternalId($externalId); + foreach ($titleWords as $position => $word) { + $this->fulltextProxy->addWord($word, $id, FulltextProxyInterface::TYPE_TITLE, (int)$position); + } + foreach ($keywords as $position => $word) { + $this->fulltextProxy->addWord($word, $id, FulltextProxyInterface::TYPE_KEYWORD, (int)$position); + } foreach ($contentWords as $position => $word) { - $this->fulltextProxy->addWord($word, $id, (int)$position); + $this->fulltextProxy->addWord($word, $id, FulltextProxyInterface::TYPE_CONTENT, (int)$position); } } @@ -149,33 +135,12 @@ public function removeFromIndex(ExternalId $externalId): void $this->fulltextProxy->removeById($internalId); - foreach ($this->indexSingleKeywords as &$data) { + foreach ($this->metadata as &$data) { if (isset($data[$internalId])) { unset($data[$internalId]); } } unset($data); - - foreach ($this->indexBaseKeywords as &$data2) { - if (isset($data2[$internalId])) { - unset($data2[$internalId]); - } - } - unset($data2); - - foreach ($this->indexMultiKeywords as &$data3) { - if (isset($data3[$internalId])) { - unset($data3[$internalId]); - } - } - unset($data3); - - foreach ($this->metadata as &$data4) { - if (isset($data4[$internalId])) { - unset($data4[$internalId]); - } - } - unset($data4); } /** @@ -249,7 +214,7 @@ public function getTocByExternalId(ExternalId $externalId): ?TocEntry { $serializedExtId = $externalId->toString(); - return isset($this->toc[$serializedExtId]) ? $this->toc[$serializedExtId] : null; + return $this->toc[$serializedExtId] ?? null; } /** diff --git a/src/S2/Rose/Storage/Database/PdoStorage.php b/src/S2/Rose/Storage/Database/PdoStorage.php index 57d74f8..4de9983 100644 --- a/src/S2/Rose/Storage/Database/PdoStorage.php +++ b/src/S2/Rose/Storage/Database/PdoStorage.php @@ -112,12 +112,13 @@ public function getSnippets(SnippetQuery $snippetQuery): SnippetResult */ public function addToFulltextIndex(array $titleWords, array $keywords, array $contentWords, ExternalId $externalId): void { - if (empty($contentWords)) { + $allWords = array_merge(array_values($contentWords), array_values($titleWords), array_values($keywords)); + if (\count($allWords) === 0) { return; } $internalId = $this->getInternalIdFromExternalId($externalId); - $wordIds = $this->getWordIds(array_merge(array_values($contentWords), array_values($titleWords), array_values($keywords))); + $wordIds = $this->getWordIds($allWords); /** * @see \S2\Rose\Entity\WordPositionContainer::compareArrays for sorting requirement @@ -292,7 +293,7 @@ public function getSimilar(ExternalId $externalId, bool $includeFormatting, ?int $row['snippet2'] = ''; } // TODO take into account format_id of these snippets - $row['snippet'] = $includeFormatting ? StringHelper::convertInternalFormattingToHtml($row['snippet']) : StringHelper::clearInternalFormatting($row['snippet']); + $row['snippet'] = $includeFormatting ? StringHelper::convertInternalFormattingToHtml($row['snippet']) : StringHelper::clearInternalFormatting($row['snippet']); $row['snippet2'] = $includeFormatting ? StringHelper::convertInternalFormattingToHtml($row['snippet2']) : StringHelper::clearInternalFormatting($row['snippet2']); } diff --git a/src/S2/Rose/Storage/File/SingleFileArrayStorage.php b/src/S2/Rose/Storage/File/SingleFileArrayStorage.php index 63a3724..58192d1 100644 --- a/src/S2/Rose/Storage/File/SingleFileArrayStorage.php +++ b/src/S2/Rose/Storage/File/SingleFileArrayStorage.php @@ -1,27 +1,25 @@ fulltextProxy = new ArrayFulltextStorage(); } - /** - * @param bool $isDebug - * - * @return array - */ - public function load($isDebug = false) + public function load(bool $isDebug = false): array { $return = []; - if (count($this->toc)) { + if (\count($this->toc)) { return $return; } @@ -55,40 +48,32 @@ public function load($isDebug = false) $return[] = ProfileHelper::getProfilePoint('Reading index file', -$start_time + ($start_time = microtime(true))); } - $end = strpos($data, "\n"); - $my_data = substr($data, 8, $end); - $data = substr($data, $end + 1); - $this->fulltextProxy->setFulltextIndex(unserialize($my_data) ?: []); + $end = strpos($data, "\n"); + $myData = substr($data, 8, $end); + $data = substr($data, $end + 1); + $unserializeOptions = ['allowed_classes' => [ + \DateTime::class, + TocEntry::class, + Img::class, + ImgCollection::class, + SnippetSource::class, + ]]; + $this->fulltextProxy->setFulltextIndex(unserialize($myData, $unserializeOptions) ?: []); $end = strpos($data, "\n"); - $my_data = substr($data, 8, $end); + $myData = substr($data, 8, $end); $data = substr($data, $end + 1); - $this->excludedWords = unserialize($my_data) ?: []; - - $end = strpos($data, "\n"); - $my_data = substr($data, 8, $end); - $data = substr($data, $end + 1); - $this->indexSingleKeywords = unserialize($my_data) ?: []; - - $end = strpos($data, "\n"); - $my_data = substr($data, 8, $end); - $data = substr($data, $end + 1); - $this->indexBaseKeywords = unserialize($my_data) ?: []; - - $end = strpos($data, "\n"); - $my_data = substr($data, 8, $end); - $data = substr($data, $end + 1); - $this->indexMultiKeywords = unserialize($my_data) ?: []; + $this->excludedWords = unserialize($myData, $unserializeOptions) ?: []; $end = strpos($data, "\n"); - $my_data = substr($data, 8, $end); + $myData = substr($data, 8, $end); $data = substr($data, $end + 1); - $this->metadata = unserialize($my_data) ?: []; + $this->metadata = unserialize($myData, $unserializeOptions) ?: []; - $end = strpos($data, "\n"); - $my_data = substr($data, 8, $end); + $end = strpos($data, "\n"); + $myData = substr($data, 8, $end); // $data = substr($data, $end + 1); - $this->toc = unserialize($my_data) ?: []; + $this->toc = unserialize($myData, $unserializeOptions) ?: []; if ($isDebug) { @@ -103,15 +88,15 @@ public function load($isDebug = false) return $return; } - public function save() + public function save(): void { @unlink($this->filename); - file_put_contents($this->filename, 'fulltextProxy->getFulltextIndex()) . ':{'); + file_put_contents($this->filename, 'fulltextProxy->getFulltextIndex()) . ':{'); $buffer = ''; $length = 0; foreach ($this->fulltextProxy->getFulltextIndex() as $word => $data) { $chunk = serialize($word) . serialize($data); - $length += strlen($chunk); + $length += \strlen($chunk); $buffer .= $chunk; if ($length > 100000) { file_put_contents($this->filename, $buffer, FILE_APPEND); @@ -125,15 +110,6 @@ public function save() file_put_contents($this->filename, ' //' . serialize($this->excludedWords) . "\n", FILE_APPEND); $this->excludedWords = []; - file_put_contents($this->filename, ' //' . serialize($this->indexSingleKeywords) . "\n", FILE_APPEND); - $this->indexSingleKeywords = []; - - file_put_contents($this->filename, ' //' . serialize($this->indexBaseKeywords) . "\n", FILE_APPEND); - $this->indexBaseKeywords = []; - - file_put_contents($this->filename, ' //' . serialize($this->indexMultiKeywords) . "\n", FILE_APPEND); - $this->indexMultiKeywords = []; - file_put_contents($this->filename, ' //' . serialize($this->metadata) . "\n", FILE_APPEND); $this->metadata = []; diff --git a/src/S2/Rose/Storage/FulltextProxyInterface.php b/src/S2/Rose/Storage/FulltextProxyInterface.php index e918672..9c15570 100644 --- a/src/S2/Rose/Storage/FulltextProxyInterface.php +++ b/src/S2/Rose/Storage/FulltextProxyInterface.php @@ -8,14 +8,17 @@ interface FulltextProxyInterface { + public const TYPE_TITLE = 1; + public const TYPE_KEYWORD = 2; + public const TYPE_CONTENT = 3; /** - * @return array[] + * @return array[][] */ public function getByWord(string $word): array; public function countByWord(string $word): int; - public function addWord(string $word, int $id, int $position): void; + public function addWord(string $word, int $id, int $type, int $position): void; public function removeWord(string $word): void; diff --git a/tests/unit/Rose/FinderTest.php b/tests/unit/Rose/FinderTest.php index aa4e898..fe9cb64 100644 --- a/tests/unit/Rose/FinderTest.php +++ b/tests/unit/Rose/FinderTest.php @@ -20,6 +20,7 @@ use S2\Rose\Storage\Dto\SnippetQuery; use S2\Rose\Storage\Dto\SnippetResult; use S2\Rose\Storage\FulltextIndexContent; +use S2\Rose\Storage\FulltextIndexPositionBag; use S2\Rose\Storage\StorageReadInterface; /** @@ -39,37 +40,37 @@ public function testIgnoreFrequentWordsInFulltext(): void $result = new FulltextIndexContent(); foreach ($words as $k => $word) { if ($word === 'find') { - $result->add($word, new ExternalId('id_3'), [], [], [1]); - $result->add($word, new ExternalId('id_2'), [], [1], [10, 20]); - $result->add($word, new ExternalId('id_1'), [1], [], []); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_3'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_2'), [], [1], [10, 20], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_1'), [1], [], [], 0)); } if ($word === 'and') { - $result->add($word, new ExternalId('id_1'), [], [], [4, 8]); - $result->add($word, new ExternalId('id_2'), [], [], [7, 11, 34]); - $result->add($word, new ExternalId('id_3'), [], [], [28, 65]); - $result->add($word, new ExternalId('id_4'), [], [], [45, 9]); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_1'), [], [], [4, 8], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_2'), [], [], [7, 11, 34], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_3'), [], [], [28, 65], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_4'), [], [], [45, 9], 0)); - $result->add($word, new ExternalId('id_5'), [], [], [1]); - $result->add($word, new ExternalId('id_6'), [], [], [1]); - $result->add($word, new ExternalId('id_7'), [], [], [1]); - $result->add($word, new ExternalId('id_8'), [], [], [1]); - $result->add($word, new ExternalId('id_9'), [], [], [1]); - $result->add($word, new ExternalId('id_10'), [], [], [1]); - $result->add($word, new ExternalId('id_11'), [], [], [1]); - $result->add($word, new ExternalId('id_12'), [], [], [1]); - $result->add($word, new ExternalId('id_13'), [], [], [1]); - $result->add($word, new ExternalId('id_14'), [], [], [1]); - $result->add($word, new ExternalId('id_15'), [], [], [1]); - $result->add($word, new ExternalId('id_16'), [], [], [1]); - $result->add($word, new ExternalId('id_17'), [], [], [1]); - $result->add($word, new ExternalId('id_18'), [], [], [1]); - $result->add($word, new ExternalId('id_19'), [], [], [1]); - $result->add($word, new ExternalId('id_20'), [], [], [1]); - $result->add($word, new ExternalId('id_21'), [], [], [1]); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_5'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_6'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_7'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_8'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_9'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_10'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_11'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_12'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_13'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_14'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_15'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_16'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_17'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_18'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_19'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_20'), [], [], [1], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_21'), [], [], [1], 0)); } if ($word === 'replace') { - $result->add($word, new ExternalId('id_2'), [], [], [12]); - $result->add($word, new ExternalId('id_1'), [1], [], []); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_2'), [], [], [12], 0)); + $result->add($word, new FulltextIndexPositionBag(new ExternalId('id_1'), [1], [], [], 0)); } unset($words[$k]); diff --git a/tests/unit/Rose/Storage/PdoStorageTest.php b/tests/unit/Rose/Storage/PdoStorageTest.php index 27e9884..bdd04b1 100644 --- a/tests/unit/Rose/Storage/PdoStorageTest.php +++ b/tests/unit/Rose/Storage/PdoStorageTest.php @@ -18,6 +18,7 @@ use S2\Rose\Storage\Database\AbstractRepository; use S2\Rose\Storage\Database\PdoStorage; use S2\Rose\Storage\Exception\EmptyIndexException; +use S2\Rose\Storage\FulltextIndexPositionBag; /** * @group storage @@ -65,8 +66,8 @@ public function testStorage() $tocEntry2 = new TocEntry('', '', new \DateTime('2014-05-28'), '', 1, 'pokjhgtyuio'); $storage->addEntryToToc($tocEntry2, $externalId2); - $storage->addToFulltextIndex([1 => 'word1', 2 => 'word2'], $externalId1); - $storage->addToFulltextIndex([1 => 'word2', 10 => 'word2'], $externalId2); + $storage->addToFulltextIndex([], [], [1 => 'word1', 2 => 'word2'], $externalId1); + $storage->addToFulltextIndex([], [], [1 => 'word2', 10 => 'word2'], $externalId2); $stat = $storage->getIndexStat(); $this->assertGreaterThan(0, $stat['bytes']); @@ -75,23 +76,23 @@ public function testStorage() // Searching $fulltextResult = $storage->fulltextResultByWords(['word1']); $this->assertEquals([ - '1:id_1' => ['pos' => [1], 'extId' => new ExternalId('id_1', 1), 'wordCount' => 0] + '1:id_1' => new FulltextIndexPositionBag(new ExternalId('id_1', 1), [], [], [1], 0) ], $fulltextResult->toArray()['word1']); $fulltextResult = $storage->fulltextResultByWords(['word2']); $this->assertEquals([ - '1:id_1' => ['pos' => ['2'], 'extId' => new ExternalId('id_1', 1), 'wordCount' => 0], - '2:id_2' => ['pos' => [1, 10], 'extId' => new ExternalId('id_2', 2), 'wordCount' => 0], + '1:id_1' => new FulltextIndexPositionBag(new ExternalId('id_1', 1), [], [], [2], 0), + '2:id_2' => new FulltextIndexPositionBag(new ExternalId('id_2', 2), [], [], [1, 10], 0), ], $fulltextResult->toArray()['word2']); $fulltextResult = $storage->fulltextResultByWords(['word2'], 1); $this->assertEquals([ - '1:id_1' => ['pos' => ['2'], 'extId' => new ExternalId('id_1', 1), 'wordCount' => 0], + '1:id_1' => new FulltextIndexPositionBag(new ExternalId('id_1', 1), [], [], ['2'], 0), ], $fulltextResult->toArray()['word2']); $fulltextResult = $storage->fulltextResultByWords(['word2'], 2); $this->assertEquals([ - '2:id_2' => ['pos' => [1, 10], 'extId' => new ExternalId('id_2', 2), 'wordCount' => 0], + '2:id_2' => new FulltextIndexPositionBag(new ExternalId('id_2', 2), [], [], [1, 10], 0), ], $fulltextResult->toArray()['word2']); $entry = $storage->getTocByExternalId($externalId2); @@ -116,7 +117,7 @@ public function testStorage() $fulltextResult = $storage->fulltextResultByWords(['word2']); $this->assertEquals([ - '1:id_1' => ['pos' => ['2'], 'extId' => new ExternalId('id_1', 1), 'wordCount' => 0], + '1:id_1' => new FulltextIndexPositionBag(new ExternalId('id_1', 1), [], [], ['2'], 0), ], $fulltextResult->toArray()['word2']); // Reinit and... @@ -127,7 +128,7 @@ public function testStorage() $fulltextResult = $storage->fulltextResultByWords(['word2']); $this->assertEquals([ - '1:id_1' => ['pos' => ['2'], 'extId' => new ExternalId('id_1', 1), 'wordCount' => 0], + '1:id_1' => new FulltextIndexPositionBag(new ExternalId('id_1', 1), [], [], ['2'], 0), ], $fulltextResult->toArray()['word2']); // Remove id_1 @@ -197,30 +198,31 @@ public function testAddToSingleKeywordIndex() $tocEntry3 = new TocEntry('test title', 'descr', new \DateTime('2014-05-28'), '', 1, '123456789'); $storage->addEntryToToc($tocEntry3, new ExternalId('id_3')); - $storage->addToSingleKeywordIndex('type1', new ExternalId('id_1'), 1); - $storage->addToSingleKeywordIndex('type2', new ExternalId('id_1'), 2); - $storage->addToSingleKeywordIndex('type1', new ExternalId('id_2'), 1); - $storage->addToSingleKeywordIndex('type1', new ExternalId('id_3'), 1); - $storage->addToSingleKeywordIndex('type1-1', new ExternalId('id_1'), 1); + $storage->addToFulltextIndex(['type1'], [], [], new ExternalId('id_1')); + $storage->addToFulltextIndex([], ['type2'], [], new ExternalId('id_1')); + $storage->addToFulltextIndex(['type1'], [], [], new ExternalId('id_2')); + $storage->addToFulltextIndex(['type1'], [], [], new ExternalId('id_3')); + $storage->addToFulltextIndex(['type1-1'], [], [], new ExternalId('id_1')); - $data = $storage->getSingleKeywordIndexByWords(['type1', 'type2']); - $this->assertCount(2, $data); + $data = $storage->fulltextResultByWords(['type1', 'type2']); + $this->assertCount(2, $data->toArray()); $result = []; - $data['type1']->iterate(static function (ExternalId $externalId, $type) use (&$result) { - $result[] = [$externalId, $type]; - }); + foreach ($data->toArray()['type1'] as $item) { + $result[] = [$item->getExternalId(), $item->getTitlePositions(), $item->getKeywordPositions()]; + } + $this->assertCount(3, $result); $this->assertEquals('id_1', $result[0][0]->getId()); - $this->assertEquals(1, $result[0][1]); + $this->assertCount(1, $result[0][1]); $result = []; - $data['type2']->iterate(static function (ExternalId $externalId, $type) use (&$result) { - $result[] = [$externalId, $type]; - }); + foreach ($data->toArray()['type2'] as $item) { + $result[] = [$item->getExternalId(), $item->getTitlePositions(), $item->getKeywordPositions()]; + } $this->assertCount(1, $result); $this->assertEquals('id_1', $result[0][0]->getId()); - $this->assertEquals(2, $result[0][1]); + $this->assertCount(1, $result[0][2]); } public function testDiacritic() @@ -232,8 +234,8 @@ public function testDiacritic() new TocEntry('title 1', 'descr 1', new \DateTime('2014-05-28'), '', 1, '123456789'), new ExternalId('id_1') ); - $storage->addToFulltextIndex(['Flugel', 'Shlomo', 'Tormented'], new ExternalId('id_1')); - $storage->addToFulltextIndex(['Flügel', 'Shlømo', 'Tørmented'], new ExternalId('id_1')); + $storage->addToFulltextIndex([], [], ['Flugel', 'Shlomo', 'Tormented'], new ExternalId('id_1')); + $storage->addToFulltextIndex([], [], ['Flügel', 'Shlømo', 'Tørmented'], new ExternalId('id_1')); } public function testLongWords() @@ -251,29 +253,29 @@ public function testLongWords() new ExternalId('id_2') ); - $storage->addToFulltextIndex(['word', 'iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8cim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_1')); - $storage->addToFulltextIndex(['word', 'iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8cim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_2')); + $storage->addToFulltextIndex([], [], ['word', 'iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8cim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_1')); + $storage->addToFulltextIndex([], [], ['word', 'iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8cim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_2')); - $storage->addToFulltextIndex(['word2', '9siufiai279837jz972q39z78qao298m3apq8n9283j298cnq08498908ks09809r8mc9o90q7808sdolfjlis39w8kso0sdu87j934797239478o7o3j4d573p985jkdx37oc8so89o3849os8l948o9l8884iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8is8ajpk9xox8jo9834k0ax8k4r9o8wk9o38rmoc8mo95m8co83km898madkjflikjiuroiuiweru0198390u90qu0p98784kqz8p94xco8mcim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_1')); - $storage->addToFulltextIndex(['word2', '9siufiai279837jz972q39z78qao298m3apq8n9283j298cnq08498908ks09809r8mc9o90q7808sdolfjlis39w8kso0sdu87j934797239478o7o3j4d573p985jkdx37oc8so89o3849os8l948o9l8884iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8is8ajpk9xox8jo9834k0ax8k4r9o8wk9o38rmoc8mo95m8co83km898madkjflikjiuroiuiweru0198390u90qu0p98784kqz8p94xco8mcim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_2')); + $storage->addToFulltextIndex([], [], ['word2', '9siufiai279837jz972q39z78qao298m3apq8n9283j298cnq08498908ks09809r8mc9o90q7808sdolfjlis39w8kso0sdu87j934797239478o7o3j4d573p985jkdx37oc8so89o3849os8l948o9l8884iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8is8ajpk9xox8jo9834k0ax8k4r9o8wk9o38rmoc8mo95m8co83km898madkjflikjiuroiuiweru0198390u90qu0p98784kqz8p94xco8mcim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_1')); + $storage->addToFulltextIndex([], [], ['word2', '9siufiai279837jz972q39z78qao298m3apq8n9283j298cnq08498908ks09809r8mc9o90q7808sdolfjlis39w8kso0sdu87j934797239478o7o3j4d573p985jkdx37oc8so89o3849os8l948o9l8884iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8is8ajpk9xox8jo9834k0ax8k4r9o8wk9o38rmoc8mo95m8co83km898madkjflikjiuroiuiweru0198390u90qu0p98784kqz8p94xco8mcim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_2')); - $storage->addToFulltextIndex(['word21', + $storage->addToFulltextIndex([], [], ['word21', 'wwjfau8wmtbmse9uvlr2ynrlkzlvdhe3mvgytjvls1jkvm1qmjvnk2jsbeYxcvznqk9or3a4vu1luzzwy1lcevltndvsskfmufnsrwt3cxduYytxqlhnyk5bbnvozkttmujlrwqyawxfexrsr1zcvzvnsgjru0tlvvr0mjryr2hxbxpaznrnywrzv0vnbmpjdja5t1rzzefkallmmevysva3zkv3cju5dvvazjbmaju5bdixvkewbuqvsuvywgdqatc5wejyt2tvntvswwx1tezhqxb1l3dkuxl5awpoqllev245vstiajfddxphwfqxvgvpegjdv3jseu9lbe1vqmxhrklla3bsrm9xukntakirwxldc3i5zjdzoggwymplmfpgrgrxkzg3qtjfsgpknwh5rmdxzzhptxvvtuv5sfznm2dznhvqwkjratlwdmhkcleYnvndshjsvkzzevpbagc1zmq0nlhlsg43ynvhruvdl0zmuhvielnhrkrzsvfyls05ukjqm24ym0d4bjfbrwfvqjlyszjnpt0', 'wwjfau8wmtbmse9uvlr2ynrlkzlvdhe3mvgytjvls1jkvm1qmjvnk2jsbeYxcvznqk9or3a4vu1luzzwy1lcevltndvsskfmufnsrwt3cxduYytxqlhnyk5bbnvozkttmujlrwqyawxfexrsr1zcvzvnsgjru0tlvvr0mjryr2hxbxpaznrnywrzv0vnbmpjdja5t1rzzefkallmmevysva3zkv3cju5dvvazjbmaju5bdixvkewbuqvsuvywgdqatc5wejyt2tvntvswwx1tezhqxb1l3dkuxl5awpoqllev245vstiajfddxphwfqxvgvpegjdv3jseu9lbe1vqmxhrklla3bsrm9xukntakirwxldc3i5zjdzoggwymplmfpgrgrxkzg3qtjfsgpknwh5rmdxzzhptxvvtuv5sfznm2dznhvqwkjratlwdmhkcleYnvndshjsvkzzevpbagc1zmq0nlhlsg43ynvhruvdl0zmuhvielnhrkrzsvfyls05ukjqm24ym0d4bjfbrwfvqjlyszjnpt1', 'wwjfau8wmtbmse9uvlr2ynrlkzlvdhe3mvgytjvls1jkvm1qmjvnk2jsbeYxcvznqk9or3a4vu1luzzwy1lcevltndvsskfmufnsrwt3cxduYytxqlhnyk5bbnvozkttmujlrwqyawxfexrsr1zcvzvnsgjru0tlvvr0mjryr2hxbxpaznrnywrzv0vnbmpjdja5t1rzzefkallmmevysva3zkv3cju5dvvazjbmaju5bdixvkewbuqvsuvywgdqatc5wejyt2tvntvswwx1tezhqxb1l3dkuxl5awpoqllev245vstiajfddxphwfqxvgvpegjdv3jseu9lbe1vqmxhrklla3bsrm9xukntakirwxldc3i5zjdzoggwymplmfpgrgrxkzg3qtjfsgpknwh5rmdxzzhptxvvtuv5sfznm2dznhvqwkjratlwdmhkcleYnvndshjsvkzzevpbagc1zmq0nlhlsg43ynvhruvdl0zmuhvielnhrkrzsvfyls05ukjqm24ym0d4bjfbrwfvqjlyszjnpt1', ], new ExternalId('id_1')); - $storage->addToFulltextIndex(['word21', + $storage->addToFulltextIndex([], [], ['word21', 'wwjfau8wmtbmse9uvlr2ynrlkzlvdhe3mvgytjvls1jkvm1qmjvnk2jsbeYxcvznqk9or3a4vu1luzzwy1lcevltndvsskfmufnsrwt3cxduYytxqlhnyk5bbnvozkttmujlrwqyawxfexrsr1zcvzvnsgjru0tlvvr0mjryr2hxbxpaznrnywrzv0vnbmpjdja5t1rzzefkallmmevysva3zkv3cju5dvvazjbmaju5bdixvkewbuqvsuvywgdqatc5wejyt2tvntvswwx1tezhqxb1l3dkuxl5awpoqllev245vstiajfddxphwfqxvgvpegjdv3jseu9lbe1vqmxhrklla3bsrm9xukntakirwxldc3i5zjdzoggwymplmfpgrgrxkzg3qtjfsgpknwh5rmdxzzhptxvvtuv5sfznm2dznhvqwkjratlwdmhkcleYnvndshjsvkzzevpbagc1zmq0nlhlsg43ynvhruvdl0zmuhvielnhrkrzsvfyls05ukjqm24ym0d4bjfbrwfvqjlyszjnpt0', 'wwjfau8wmtbmse9uvlr2ynrlkzlvdhe3mvgytjvls1jkvm1qmjvnk2jsbeYxcvznqk9or3a4vu1luzzwy1lcevltndvsskfmufnsrwt3cxduYytxqlhnyk5bbnvozkttmujlrwqyawxfexrsr1zcvzvnsgjru0tlvvr0mjryr2hxbxpaznrnywrzv0vnbmpjdja5t1rzzefkallmmevysva3zkv3cju5dvvazjbmaju5bdixvkewbuqvsuvywgdqatc5wejyt2tvntvswwx1tezhqxb1l3dkuxl5awpoqllev245vstiajfddxphwfqxvgvpegjdv3jseu9lbe1vqmxhrklla3bsrm9xukntakirwxldc3i5zjdzoggwymplmfpgrgrxkzg3qtjfsgpknwh5rmdxzzhptxvvtuv5sfznm2dznhvqwkjratlwdmhkcleYnvndshjsvkzzevpbagc1zmq0nlhlsg43ynvhruvdl0zmuhvielnhrkrzsvfyls05ukjqm24ym0d4bjfbrwfvqjlyszjnpt0', 'wwjfau8wmtbmse9uvlr2ynrlkzlvdhe3mvgytjvls1jkvm1qmjvnk2jsbeYxcvznqk9or3a4vu1luzzwy1lcevltndvsskfmufnsrwt3cxduYytxqlhnyk5bbnvozkttmujlrwqyawxfexrsr1zcvzvnsgjru0tlvvr0mjryr2hxbxpaznrnywrzv0vnbmpjdja5t1rzzefkallmmevysva3zkv3cju5dvvazjbmaju5bdixvkewbuqvsuvywgdqatc5wejyt2tvntvswwx1tezhqxb1l3dkuxl5awpoqllev245vstiajfddxphwfqxvgvpegjdv3jseu9lbe1vqmxhrklla3bsrm9xukntakirwxldc3i5zjdzoggwymplmfpgrgrxkzg3qtjfsgpknwh5rmdxzzhptxvvtuv5sfznm2dznhvqwkjratlwdmhkcleYnvndshjsvkzzevpbagc1zmq0nlhlsg43ynvhruvdl0zmuhvielnhrkrzsvfyls05ukjqm24ym0d4bjfbrwfvqjlyszjnpt1', ], new ExternalId('id_2')); - $storage->addToFulltextIndex(['word3', '1' . str_repeat('ю', 200)], new ExternalId('id_1')); - $storage->addToFulltextIndex(['word3', '1' . str_repeat('ю', 200)], new ExternalId('id_2')); + $storage->addToFulltextIndex([], [], ['word3', '1' . str_repeat('ю', 200)], new ExternalId('id_1')); + $storage->addToFulltextIndex([], [], ['word3', '1' . str_repeat('ю', 200)], new ExternalId('id_2')); - $storage->addToFulltextIndex(['word4', '1' . str_repeat('я', 255)], new ExternalId('id_1')); - $storage->addToFulltextIndex(['word4', '1' . str_repeat('я', 255)], new ExternalId('id_2')); + $storage->addToFulltextIndex([], [], ['word4', '1' . str_repeat('я', 255)], new ExternalId('id_1')); + $storage->addToFulltextIndex([], [], ['word4', '1' . str_repeat('я', 255)], new ExternalId('id_2')); } public function testParallelAddingInTransactions(): void @@ -302,7 +304,7 @@ public function testParallelAddingInTransactions(): void new TocEntry('title 1', 'descr 1', new \DateTime('2014-05-28'), '', 1, '123456789'), new ExternalId('id_1') ); - $storage->addToFulltextIndex(['word1', 'word2', 'word3'], new ExternalId('id_1')); + $storage->addToFulltextIndex([], [], ['word1', 'word2', 'word3'], new ExternalId('id_1')); $storage2 = new PdoStorage($pdo2, 'test_tr_'); $storage2->startTransaction(); @@ -319,10 +321,10 @@ public function testParallelAddingInTransactions(): void if ($driverName !== 'sqlite') { $this->expectException(RuntimeException::class); $this->expectExceptionMessage('Cannot insert words. Possible deadlock?'); - $storage2->addToFulltextIndex(['word1', 'word5'], new ExternalId('id_2')); + $storage2->addToFulltextIndex([], [], ['word1', 'word5'], new ExternalId('id_2')); // $storage2->commitTransaction(); // -// $storage->addToFulltext(['word4', 'word5', 'word6'], new ExternalId('id_1')); +// $storage->addToFulltext([], [], ['word4', 'word5', 'word6'], new ExternalId('id_1')); // $storage->commitTransaction(); } } @@ -399,21 +401,21 @@ public function testNonExistentDbAddToFulltext() { $this->expectException(UnknownIdException::class); $storage = new PdoStorage($this->pdo, 'non_existent_'); - $storage->addToFulltextIndex(['word'], new ExternalId('id_1')); + $storage->addToFulltextIndex([], [], ['word'], new ExternalId('id_1')); } public function testNonExistentDbAddToSingleKeywordIndex() { $this->expectException(UnknownIdException::class); $storage = new PdoStorage($this->pdo, 'non_existent_'); - $storage->addToSingleKeywordIndex('keyword', new ExternalId('id_1'), 1); + $storage->addToFulltextIndex([], ['keyword'], [], new ExternalId('id_1'), 1); } public function testNonExistentDbAddToMultipleKeywordIndex() { $this->expectException(UnknownIdException::class); $storage = new PdoStorage($this->pdo, 'non_existent_'); - $storage->addToMultipleKeywordIndex('multi keyword', new ExternalId('id_1'), 1); + $storage->addToFulltextIndex([], ['multi keyword'], [], new ExternalId('id_1'), 1); } public function testNonExistentDbGetTocByExternalIds() @@ -437,18 +439,11 @@ public function testNonExistentDbFillFulltextResultForWords() $storage->fulltextResultByWords(['word']); } - public function testNonExistentDbGetSingleKeywordIndexByString() - { - $this->expectException(EmptyIndexException::class); - $storage = new PdoStorage($this->pdo, 'non_existent_'); - $storage->getSingleKeywordIndexByWords(['keyword']); - } - - public function testNonExistentDbGetMultipleKeywordIndexByString() + public function testNonExistentDbGetSimilar() { $this->expectException(EmptyIndexException::class); $storage = new PdoStorage($this->pdo, 'non_existent_'); - $storage->getMultipleKeywordIndexByString('multi keyword'); + $storage->getSimilar(new ExternalId('id_1'), true); } public function testNonExistentDbRemoveFromToc() diff --git a/tests/unit/Rose/Storage/SingleFileArrayStorageTest.php b/tests/unit/Rose/Storage/SingleFileArrayStorageTest.php index 0a75851..7985084 100644 --- a/tests/unit/Rose/Storage/SingleFileArrayStorageTest.php +++ b/tests/unit/Rose/Storage/SingleFileArrayStorageTest.php @@ -14,6 +14,7 @@ /** * @group storage + * @group arr-storage */ class SingleFileArrayStorageTest extends Unit { @@ -50,17 +51,21 @@ public function testStorage() $this->assertEquals(1, $entry1->getInternalId()); $this->assertEquals(2, $entry2->getInternalId()); - $storage->addToFulltextIndex([1 => 'hello', 2 => 'world'], new ExternalId('test_id_1')); + $storage->addToFulltextIndex(['titleword'], ['keyword1', 'keyword2'], [1 => 'hello', 2 => 'world', 3=>'world'], new ExternalId('test_id_1')); - $fulltextResult = $storage->fulltextResultByWords(['hello']); + $fulltextResult = $storage->fulltextResultByWords(['hello'], null); $info = $fulltextResult->toArray()['hello']; $this->assertArrayHasKey(':test_id_1', $info); - $this->assertEquals([1], $info[':test_id_1']['pos']); + $this->assertEquals([1], $info[':test_id_1']->getContentPositions()); + $this->assertEquals([], $info[':test_id_1']->getTitlePositions()); + $this->assertEquals([], $info[':test_id_1']->getKeywordPositions()); - $fulltextResult = $storage->fulltextResultByWords(['world']); + $fulltextResult = $storage->fulltextResultByWords(['world'], null); $info = $fulltextResult->toArray()['world']; $this->assertArrayHasKey(':test_id_1', $info); - $this->assertEquals([2], $info[':test_id_1']['pos']); + $this->assertEquals([2, 3], $info[':test_id_1']->getContentPositions()); + $this->assertEquals([], $info[':test_id_1']->getTitlePositions()); + $this->assertEquals([], $info[':test_id_1']->getKeywordPositions()); $storage->save(); @@ -74,14 +79,14 @@ public function testStorage() $entry3 = $storage->getTocByExternalId(new ExternalId('test_id_3')); $this->assertNull($entry3); - $storage->addToFulltextIndex([10 => 'hello', 20 => 'world'], new ExternalId('test_id_2')); + $storage->addToFulltextIndex([], [], [10 => 'hello', 20 => 'world'], new ExternalId('test_id_2')); - $fulltextResult = $storage->fulltextResultByWords(['world']); + $fulltextResult = $storage->fulltextResultByWords(['world'], null); $info = $fulltextResult->toArray()['world']; $this->assertArrayHasKey(':test_id_1', $info); - $this->assertEquals([2], $info[':test_id_1']['pos']); + $this->assertEquals([2, 3], $info[':test_id_1']->getContentPositions()); $this->assertArrayHasKey(':test_id_2', $info); - $this->assertEquals([20], $info[':test_id_2']['pos']); + $this->assertEquals([20], $info[':test_id_2']->getContentPositions()); $storage->save(); }