From b4a1da80a1b9ec74b0b5712d2e11936b67e0389f Mon Sep 17 00:00:00 2001 From: Roman Parpalak Date: Tue, 14 Nov 2023 12:32:48 +0200 Subject: [PATCH] WIP --- src/S2/Rose/Entity/FulltextResult.php | 89 ++++------ .../Exception/UnknownKeywordTypeException.php | 12 -- src/S2/Rose/Finder.php | 64 +------ src/S2/Rose/Indexer.php | 94 +++++----- src/S2/Rose/Storage/ArrayFulltextStorage.php | 39 ++--- src/S2/Rose/Storage/ArrayStorage.php | 102 ++--------- .../Storage/Database/AbstractRepository.php | 165 ++++-------------- .../Rose/Storage/Database/MysqlRepository.php | 18 -- src/S2/Rose/Storage/Database/PdoStorage.php | 130 +++----------- .../Storage/Database/PostgresRepository.php | 30 ---- .../Storage/Database/SqliteRepository.php | 30 ---- src/S2/Rose/Storage/FulltextIndexContent.php | 12 +- .../Rose/Storage/FulltextProxyInterface.php | 37 +--- src/S2/Rose/Storage/KeywordIndexContent.php | 40 ----- src/S2/Rose/Storage/StorageReadInterface.php | 39 +---- src/S2/Rose/Storage/StorageWriteInterface.php | 50 +----- tests/unit/Rose/FinderTest.php | 72 +++----- tests/unit/Rose/IntegrationTest.php | 2 +- tests/unit/Rose/Storage/PdoStorageTest.php | 36 ++-- .../Storage/SingleFileArrayStorageTest.php | 4 +- 20 files changed, 243 insertions(+), 822 deletions(-) delete mode 100644 src/S2/Rose/Exception/UnknownKeywordTypeException.php delete mode 100644 src/S2/Rose/Storage/KeywordIndexContent.php diff --git a/src/S2/Rose/Entity/FulltextResult.php b/src/S2/Rose/Entity/FulltextResult.php index 63f4ce0..129f3ae 100644 --- a/src/S2/Rose/Entity/FulltextResult.php +++ b/src/S2/Rose/Entity/FulltextResult.php @@ -11,27 +11,11 @@ class FulltextResult { - /** - * @var int - */ - protected $tocSize = 0; - - /** - * @var FulltextQuery - */ - protected $query; - - /** - * @var FulltextIndexContent - */ - protected $fulltextIndexContent; + protected int $tocSize = 0; + protected FulltextQuery $query; + protected FulltextIndexContent $fulltextIndexContent; - /** - * @param FulltextQuery $query - * @param FulltextIndexContent $fulltextIndexContent - * @param int $tocSize - */ - public function __construct(FulltextQuery $query, FulltextIndexContent $fulltextIndexContent, $tocSize = 0) + public function __construct(FulltextQuery $query, FulltextIndexContent $fulltextIndexContent, int $tocSize = 0) { $this->query = $query; $this->fulltextIndexContent = $fulltextIndexContent; @@ -40,13 +24,8 @@ public function __construct(FulltextQuery $query, FulltextIndexContent $fulltext /** * https://i.upmath.me/svg/%5Cbegin%7Btikzpicture%7D%5Bscale%3D1.0544%5D%5Csmall%0A%5Cbegin%7Baxis%7D%5Baxis%20line%20style%3Dgray%2C%0A%09samples%3D100%2C%0A%09xmin%3D-1.2%2C%20xmax%3D1.2%2C%0A%09ymin%3D0%2C%20ymax%3D1.1%2C%0A%09restrict%20y%20to%20domain%3D-0.1%3A1%2C%0A%09ytick%3D%7B1%7D%2C%0A%09xtick%3D%7B-1%2C1%7D%2C%0A%09axis%20equal%2C%0A%09axis%20x%20line%3Dcenter%2C%0A%09axis%20y%20line%3Dcenter%2C%0A%09xlabel%3D%24x%24%2Cylabel%3D%24y%24%5D%0A%5Caddplot%5Bred%2Cdomain%3D-2%3A1%2Csemithick%5D%7Bexp(-(x%2F0.38)%5E2)%7D%3B%0A%5Caddplot%5Bred%5D%20coordinates%20%7B(0.8%2C0.6)%7D%20node%7B%24y%3De%5E%7B-%5Cleft(x%2F0.38%5Cright)%5E2%7D%24%7D%3B%0A%5Cpath%20(axis%20cs%3A0%2C0)%20node%20%5Banchor%3Dnorth%20west%2Cyshift%3D-0.07cm%5D%20%7B0%7D%3B%0A%5Cend%7Baxis%7D%0A%5Cend%7Btikzpicture%7D - * - * @param int $tocSize - * @param int $foundTocEntriesNum - * - * @return float */ - public static function frequencyReduction($tocSize, $foundTocEntriesNum) + public static function frequencyReduction(int $tocSize, int $foundTocEntriesNum): float { if ($tocSize < 5) { return 1; @@ -57,12 +36,8 @@ public static function frequencyReduction($tocSize, $foundTocEntriesNum) /** * Weight ratio for repeating words in the indexed item. - * - * @param int $repeatNum - * - * @return float */ - protected static function repeatWeightRatio($repeatNum) + protected static function repeatWeightRatio(int $repeatNum): float { return min(0.5 * ($repeatNum - 1) + 1, 4); } @@ -71,12 +46,8 @@ protected static function repeatWeightRatio($repeatNum) * Weight ratio for entry size (prefer some middle size) * * https://i.upmath.me/g/%5Cbegin%7Btikzpicture%7D%5Bscale%3D1.0544%5D%5Csmall%0A%5Cbegin%7Baxis%7D%5Baxis%20line%20style%3Dgray%2C%0A%09samples%3D100%2C%0A%09ymin%3D0%2C%20ymax%3D5%2C%0A%09xmin%3D0%2C%20xmax%3D1100%2C%0A%09ytick%3D%7B1%2C2%7D%2C%0A%09xtick%3D%7B50%2C200%2C500%2C1000%7D%2C%0A%09axis%20x%20line%3Dcenter%2C%0A%09axis%20y%20line%3Dcenter%2C%0A%09xlabel%3D%24x%24%2Cylabel%3D%24y%24%5D%0A%5Caddplot%5Bred%2Cdomain%3D0%3A1000%2Csemithick%5D%7B1%2F(1%2Bexp((sqrt(x)-18)%5E2%2F60))%2B1%7D%3B%0A%5Caddplot%5Bblue%2Cdomain%3D0%3A1000%2Csemithick%5D%7B1%7D%3B%0A%5Caddplot%5Bred%5D%20coordinates%20%7B(600%2C3)%7D%20node%7B%24y%3D1%2F(1%2Bexp((sqrt(x)-18)%5E2%2F60))%2B1%24%7D%3B%0A%5Cend%7Baxis%7D%0A%5Cend%7Btikzpicture%7D - * - * @param int $totalWordsNum - * - * @return float */ - protected static function entrySizeWeightRatio($totalWordsNum) + protected static function entrySizeWeightRatio(int $totalWordsNum): float { return $totalWordsNum >= 10 ? 1.0 + 1.0 / (1.0 + exp((sqrt($totalWordsNum) - 18) ** 2 / 60.0)) : 1; } @@ -89,42 +60,54 @@ protected static function entrySizeWeightRatio($totalWordsNum) * * @return float */ - protected static function neighbourWeight($distance) + protected static function neighbourWeight(float $distance): float { return 30.0 / (1 + pow($distance / 7.0, 2)); } /** - * @param ResultSet $resultSet - * * @throws ImmutableException */ - public function fillResultSet(ResultSet $resultSet) + public function fillResultSet(ResultSet $resultSet): void { - // $queryWordCount = $this->query->getCount(); - $wordReductionRatios = []; - foreach ($this->fulltextIndexContent->toArray() as $word => $items) { - $reductionRatio = self::frequencyReduction($this->tocSize, count($items)); + foreach ($this->fulltextIndexContent->toArray() as $word => $indexedItems) { + $reductionRatio = self::frequencyReduction($this->tocSize, \count($indexedItems)); $wordReductionRatios[$word] = $reductionRatio; - foreach ($items as $positions) { - $weights = [ - 'abundance_reduction' => $reductionRatio, - 'repeat_multiply' => self::repeatWeightRatio(count($positions['pos'])), - 'entry_size' => self::entrySizeWeightRatio($positions['wordCount']), - ]; - $resultSet->addWordWeight($word, $positions['extId'], $weights, $positions['pos']); + foreach ($indexedItems as $positions) { + $externalId = $positions['extId']; + if (\count($positions['pos']) > 0) { + $weights = [ + 'abundance_reduction' => $reductionRatio, + 'repeat_multiply' => self::repeatWeightRatio(\count($positions['pos'])), + 'entry_size' => self::entrySizeWeightRatio($positions['wordCount']), + ]; + $resultSet->addWordWeight($word, $externalId, $weights, $positions['pos']); + } + if (\count($positions['kpos']) > 0) { + $resultSet->addWordWeight($word, $externalId, [ + 'keyword' => 15, + 'abundance_reduction' => $reductionRatio, + ]); + } + if (\count($positions['tpos']) > 0) { + $resultSet->addWordWeight($word, $externalId, [ + 'title' => 25, + // TODO seems like this was not used before + // 'abundance_reduction' => $reductionRatio, + ]); + } } } $referenceContainer = $this->query->toWordPositionContainer(); - $this->fulltextIndexContent->iterateWordPositions( + $this->fulltextIndexContent->iterateContentWordPositions( static function (ExternalId $id, WordPositionContainer $container) use ($referenceContainer, $wordReductionRatios, $resultSet) { $pairsDistance = $container->compareWith($referenceContainer); foreach ($pairsDistance as $pairDistance) { - list($word1, $word2, $distance) = $pairDistance; + [$word1, $word2, $distance] = $pairDistance; $weight = self::neighbourWeight($distance); if (isset($wordReductionRatios[$word1])) { $weight *= $wordReductionRatios[$word1]; diff --git a/src/S2/Rose/Exception/UnknownKeywordTypeException.php b/src/S2/Rose/Exception/UnknownKeywordTypeException.php deleted file mode 100644 index 017b69e..0000000 --- a/src/S2/Rose/Exception/UnknownKeywordTypeException.php +++ /dev/null @@ -1,12 +0,0 @@ -setHighlightTemplate($this->highlightTemplate); } - $rawWords = $query->valueToArray(); - $cleanedQuery = implode(' ', $rawWords); + $rawWords = $query->valueToArray(); $resultSet->addProfilePoint('Input cleanup'); - if (\count($rawWords) > 1) { - $this->findSpacedKeywords($cleanedQuery, $query->getInstanceId(), $resultSet); - $resultSet->addProfilePoint('Keywords with space'); - } - if (\count($rawWords) > 0) { - $this->findSimpleKeywords($rawWords, $query->getInstanceId(), $resultSet); - $resultSet->addProfilePoint('Simple keywords'); - $this->findFulltext($rawWords, $query->getInstanceId(), $resultSet); $resultSet->addProfilePoint('Fulltext search'); } @@ -108,23 +95,6 @@ public static function fulltextRateExcludeNum(int $tocSize): int return max($tocSize * 0.5, 20); } - /** - * @return int[]|array - * @throws UnknownKeywordTypeException - */ - protected static function getKeywordWeight(int $type): array - { - if ($type === self::TYPE_KEYWORD) { - return ['keyword' => 15]; - } - - if ($type === self::TYPE_TITLE) { - return ['title' => 25]; - } - - throw new UnknownKeywordTypeException(sprintf('Unknown type "%s"', $type)); - } - /** * @throws ImmutableException */ @@ -141,36 +111,6 @@ protected function findFulltext(array $words, ?int $instanceId, ResultSet $resul $fulltextResult->fillResultSet($resultSet); } - /** - * @param string[] $words - */ - protected function findSimpleKeywords(array $words, ?int $instanceId, ResultSet $result): void - { - $wordsWithStems = $words; - foreach ($words as $word) { - $stem = $this->stemmer->stemWord($word); - $wordsWithStems[] = $stem; - } - - foreach ($this->storage->getSingleKeywordIndexByWords($wordsWithStems, $instanceId) as $word => $content) { - $content->iterate(static function (ExternalId $externalId, $type, $tocSize, $foundTocEntriesNum) use ($word, $result) { - $weights = self::getKeywordWeight($type); - if ($tocSize !== null && $foundTocEntriesNum !== null) { - $weights['abundance_reduction'] = FulltextResult::frequencyReduction($tocSize, $foundTocEntriesNum); - } - $result->addWordWeight($word, $externalId, $weights); - }); - } - } - - protected function findSpacedKeywords(string $string, ?int $instanceId, ResultSet $result): void - { - $content = $this->storage->getMultipleKeywordIndexByString($string, $instanceId); - $content->iterate(static function (ExternalId $externalId, $type) use ($string, $result) { - $result->addWordWeight($string, $externalId, self::getKeywordWeight($type)); - }); - } - public function buildSnippets(array $relevanceByExternalIds, ResultSet $resultSet): void { $snippetQuery = new SnippetQuery(ExternalIdCollection::fromStringArray(array_keys($relevanceByExternalIds))); diff --git a/src/S2/Rose/Indexer.php b/src/S2/Rose/Indexer.php index b0ebcf7..9e6ff32 100644 --- a/src/S2/Rose/Indexer.php +++ b/src/S2/Rose/Indexer.php @@ -76,65 +76,26 @@ protected static function arrayFromStr(string $contents): array return $words; } - protected function addKeywordToIndex(string $word, ExternalId $externalId, int $type): void - { - if ($word === '') { - return; - } - - $word = str_replace('ё', 'е', $word); - - if (strpos($word, ' ') !== false) { - $this->storage->addToMultipleKeywordIndex($word, $externalId, $type); - } else { - $this->storage->addToSingleKeywordIndex($word, $externalId, $type); - } - } - protected function addToIndex(ExternalId $externalId, string $title, ContentWithMetadata $content, string $keywords): void { - // Processing title - foreach (self::arrayFromStr($title) as $titleWord) { - $this->addKeywordToIndex($this->stemmer->stemWord(trim($titleWord)), $externalId, Finder::TYPE_TITLE); - } - - // Processing keywords - foreach (explode(',', $keywords) as $item) { - $this->addKeywordToIndex($this->stemmer->stemWord(trim($item)), $externalId, Finder::TYPE_KEYWORD); - } - - // Fulltext index - $sentenceCollection = $content->getSentenceMap()->toSentenceCollection(); $words = $sentenceCollection->getWordsArray(); $words = array_merge($words, self::arrayFromStr(str_replace(', ', ' ', $keywords))); - $subWords = []; - - foreach ($words as $i => &$word) { - if ($this->storage->isExcluded($word)) { + foreach ($words as $i => $word) { + if ($this->storage->isExcludedWord($word)) { unset($words[$i]); - continue; - } - - $stemmedWord = $this->stemmer->stemWord($word, false); - - // If the word contains punctuation marks like hyphen, add a variant without it - if (false !== strpbrk($stemmedWord, '-.,')) { - foreach (preg_split('#[\-.,]#', $word) as $k => $subWord) { - if ($subWord) { - $subWords[(string)($i + 0.001 * ($k + 1))] = $this->stemmer->stemWord($subWord, false); - } - } } - - $word = $stemmedWord; } - unset($word); $this->storage->addMetadata($externalId, \count($words), $content->getImageCollection()); $this->storage->addSnippets($externalId, ...$sentenceCollection->getSnippetSources()); - $this->storage->addToFulltext(array_merge($words, $subWords), $externalId); + $this->storage->addToFulltextIndex( + $this->getStemsWithComponents(self::arrayFromStr($title)), + $this->getStemsWithComponents(self::arrayFromStr($keywords)), // TODO consider different semantics of space and comma? + $this->getStemsWithComponents($words), + $externalId + ); } public function removeById(string $id, ?int $instanceId): void @@ -183,7 +144,7 @@ protected function doIndex(Indexable $indexable): void $this->storage->addEntryToToc($indexable->toTocEntry(), $externalId); - if (!$oldTocEntry || $oldTocEntry->getHash() !== $indexable->calcHash()) { + if ($oldTocEntry === null || $oldTocEntry->getHash() !== $indexable->calcHash()) { $this->storage->removeFromIndex($externalId); $extractionResult = $this->extractor->extract($indexable->getContent()); @@ -219,4 +180,41 @@ protected function doIndex(Indexable $indexable): void throw $e; } } + + /** + * Replaces words with stems. Also, this method detects compound words and adds the component stems to the result. + * + * The keys in the result arrays are the positions of the word. For compound words a string representation + * of a float is used to map one index to several words. For example, for input + * + * [10 => 'well-known', 11 => 'facts'] + * + * this method returns + * + * [10 => 'well-known', '10.001' => 'well', '10.002' => 'known', 11 => 'fact'] + * + * @param array $words + * @return array + */ + private function getStemsWithComponents(array $words): array + { + $componentsOfCompoundWords = []; + foreach ($words as $i => &$word) { + $stemmedWord = $this->stemmer->stemWord($word, false); + + // If the word contains punctuation marks like hyphen, add a variant without it + if (false !== strpbrk($stemmedWord, '-.,')) { + foreach (preg_split('#[\-.,]#', $word) as $k => $subWord) { + if ($subWord) { + $componentsOfCompoundWords[(string)($i + 0.001 * ($k + 1))] = $this->stemmer->stemWord($subWord, false); + } + } + } + + $word = $stemmedWord; + } + unset($word); + + return array_merge($words, $componentsOfCompoundWords); + } } diff --git a/src/S2/Rose/Storage/ArrayFulltextStorage.php b/src/S2/Rose/Storage/ArrayFulltextStorage.php index d62269f..34d233e 100644 --- a/src/S2/Rose/Storage/ArrayFulltextStorage.php +++ b/src/S2/Rose/Storage/ArrayFulltextStorage.php @@ -1,35 +1,24 @@ fulltextIndex; } - /** - * @param array $fulltextIndex - * - * @return ArrayFulltextStorage - */ - public function setFulltextIndex(array $fulltextIndex = null) + public function setFulltextIndex(array $fulltextIndex): self { $this->fulltextIndex = $fulltextIndex; @@ -39,7 +28,7 @@ public function setFulltextIndex(array $fulltextIndex = null) /** * {@inheritdoc} */ - public function getByWord($word) + public function getByWord(string $word): array { if (!isset($this->fulltextIndex[$word])) { return []; @@ -47,7 +36,7 @@ public function getByWord($word) $result = []; foreach ($this->fulltextIndex[$word] as $id => $entries) { - if (is_int($entries)) { + if (\is_int($entries)) { $result[$id][] = $entries; } else { $entries = explode('|', $entries); @@ -63,19 +52,19 @@ public function getByWord($word) /** * {@inheritdoc} */ - public function countByWord($word) + public function countByWord(string $word): int { if (!isset($this->fulltextIndex[$word])) { return 0; } - return count($this->fulltextIndex[$word]); + return \count($this->fulltextIndex[$word]); } /** * {@inheritdoc} */ - public function addWord($word, $id, $position) + public function addWord(string $word, int $id, int $position): void { $word = (string)$word; if ($word === '') { @@ -84,7 +73,7 @@ public function addWord($word, $id, $position) if (isset($this->fulltextIndex[$word][$id])) { $value = $this->fulltextIndex[$word][$id]; - if (is_int($value)) { + if (\is_int($value)) { // There was the only one position, but it's no longer the case. // Convert to the 36-based number system. $this->fulltextIndex[$word][$id] = base_convert($value, 10, 36) . '|' . base_convert($position, 10, 36); @@ -101,7 +90,7 @@ public function addWord($word, $id, $position) /** * {@inheritdoc} */ - public function removeWord($word) + public function removeWord(string $word): void { unset($this->fulltextIndex[$word]); } @@ -109,13 +98,13 @@ public function removeWord($word) /** * {@inheritdoc} */ - public function getFrequentWords($threshold) + public function getFrequentWords(int $threshold): array { $result = []; $link = &$this->fulltextIndex; // for memory optimization foreach ($this->fulltextIndex as $word => $stat) { // Drop fulltext frequent or empty items - $num = count($stat); + $num = \count($stat); if ($num > $threshold) { $result[$word] = $num; } @@ -127,7 +116,7 @@ public function getFrequentWords($threshold) /** * {@inheritdoc} */ - public function removeById($id) + public function removeById(int $id): void { foreach ($this->fulltextIndex as &$data) { if (isset($data[$id])) { diff --git a/src/S2/Rose/Storage/ArrayStorage.php b/src/S2/Rose/Storage/ArrayStorage.php index 29a9b38..b6ed4b8 100644 --- a/src/S2/Rose/Storage/ArrayStorage.php +++ b/src/S2/Rose/Storage/ArrayStorage.php @@ -63,7 +63,7 @@ abstract class ArrayStorage implements StorageReadInterface, StorageWriteInterfa /** * {@inheritdoc} */ - public function fulltextResultByWords(array $words, $instanceId = null) + public function fulltextResultByWords(array $words, ?int $instanceId): FulltextIndexContent { $result = new FulltextIndexContent(); foreach ($words as $word) { @@ -82,49 +82,6 @@ public function fulltextResultByWords(array $words, $instanceId = null) return $result; } - /** - * {@inheritdoc} - */ - public function getSingleKeywordIndexByWords(array $words, $instanceId = null) - { - $result = []; - foreach ($words as $word) { - $result[$word] = new KeywordIndexContent(); - if (isset($this->indexSingleKeywords[$word])) { - foreach ($this->indexSingleKeywords[$word] as $id => $type) { - $externalId = $this->externalIdFromInternalId($id); - if ($instanceId === null || $externalId->getInstanceId() === $instanceId) { - $result[$word]->add($externalId, $type, $this->getTocSize($instanceId), $this->fulltextProxy->countByWord($word)); - } - } - } - } - - return $result; - } - - /** - * {@inheritdoc} - */ - public function getMultipleKeywordIndexByString($string, $instanceId = null) - { - $string = ' ' . $string . ' '; - - $result = new KeywordIndexContent(); - foreach ($this->indexMultiKeywords as $keyword => $typesById) { - if (strpos($string, ' ' . $keyword . ' ') !== false) { - foreach ($typesById as $id => $type) { - $externalId = $this->externalIdFromInternalId($id); - if ($instanceId === null || $externalId->getInstanceId() === $instanceId) { - $result->add($externalId, $type); - } - } - } - } - - return $result; - } - /** * {@inheritdoc} */ @@ -151,10 +108,11 @@ public function getSnippets(SnippetQuery $snippetQuery): SnippetResult * {@inheritdoc} * @throws UnknownIdException */ - public function addToFulltext(array $words, ExternalId $externalId) + public function addToFulltextIndex(array $contentWords, array $titleWords, array $keywords, ExternalId $externalId): void { + // TODO $id = $this->internalIdFromExternalId($externalId); - foreach ($words as $position => $word) { + foreach ($contentWords as $position => $word) { $this->fulltextProxy->addWord($word, $id, (int)$position); } } @@ -162,7 +120,7 @@ public function addToFulltext(array $words, ExternalId $externalId) /** * {@inheritdoc} */ - public function isExcluded($word) + public function isExcludedWord(string $word): bool { return isset($this->excludedWords[$word]); } @@ -170,9 +128,9 @@ public function isExcluded($word) /** * Drops frequent words from index. */ - public function cleanup() + public function cleanup(): void { - $threshold = Finder::fulltextRateExcludeNum(count($this->toc)); + $threshold = Finder::fulltextRateExcludeNum(\count($this->toc)); foreach ($this->fulltextProxy->getFrequentWords($threshold) as $word => $stat) { // Drop fulltext frequent or empty items @@ -185,25 +143,7 @@ public function cleanup() * {@inheritdoc} * @throws UnknownIdException */ - public function addToSingleKeywordIndex($word, ExternalId $externalId, $type) - { - $this->indexSingleKeywords[$word][$this->internalIdFromExternalId($externalId)] = $type; - } - - /** - * {@inheritdoc} - * @throws UnknownIdException - */ - public function addToMultipleKeywordIndex($string, ExternalId $externalId, $type) - { - $this->indexMultiKeywords[$string][$this->internalIdFromExternalId($externalId)] = $type; - } - - /** - * {@inheritdoc} - * @throws UnknownIdException - */ - public function removeFromIndex(ExternalId $externalId) + public function removeFromIndex(ExternalId $externalId): void { $internalId = $this->internalIdFromExternalId($externalId); @@ -241,7 +181,7 @@ public function removeFromIndex(ExternalId $externalId) /** * {@inheritdoc} */ - public function addEntryToToc(TocEntry $entry, ExternalId $externalId) + public function addEntryToToc(TocEntry $entry, ExternalId $externalId): void { try { $internalId = $this->internalIdFromExternalId($externalId); @@ -285,7 +225,7 @@ public function addSnippets(ExternalId $externalId, SnippetSource ...$snippets): /** * {@inheritdoc} */ - public function getTocByExternalIds(ExternalIdCollection $externalIds, $instanceId = null) + public function getTocByExternalIds(ExternalIdCollection $externalIds): array { $result = []; foreach ($externalIds->toArray() as $externalId) { @@ -305,7 +245,7 @@ public function getTocByExternalIds(ExternalIdCollection $externalIds, $instance /** * {@inheritdoc} */ - public function getTocByExternalId(ExternalId $externalId) + public function getTocByExternalId(ExternalId $externalId): ?TocEntry { $serializedExtId = $externalId->toString(); @@ -315,7 +255,7 @@ public function getTocByExternalId(ExternalId $externalId) /** * {@inheritdoc} */ - public function removeFromToc(ExternalId $externalId) + public function removeFromToc(ExternalId $externalId): void { $serializedExtId = $externalId->toString(); if (!isset($this->toc[$serializedExtId])) { @@ -329,18 +269,15 @@ public function removeFromToc(ExternalId $externalId) /** * {@inheritdoc} */ - public function getTocSize($instanceId) + public function getTocSize(?int $instanceId): int { - return count($this->toc); + return \count($this->toc); } /** - * @param ExternalId $externalId - * - * @return int * @throws UnknownIdException */ - private function internalIdFromExternalId(ExternalId $externalId) + private function internalIdFromExternalId(ExternalId $externalId): int { $serializedExtId = $externalId->toString(); if (!isset($this->toc[$serializedExtId])) { @@ -350,13 +287,8 @@ private function internalIdFromExternalId(ExternalId $externalId) return $this->toc[$serializedExtId]->getInternalId(); } - /** - * @param int $internalId - * - * @return ExternalId - */ - private function externalIdFromInternalId($internalId) + private function externalIdFromInternalId(int $internalId): ?ExternalId { - return isset($this->externalIdMap[$internalId]) ? $this->externalIdMap[$internalId] : null; + return $this->externalIdMap[$internalId] ?? null; } } diff --git a/src/S2/Rose/Storage/Database/AbstractRepository.php b/src/S2/Rose/Storage/Database/AbstractRepository.php index 5c45002..5bab47e 100644 --- a/src/S2/Rose/Storage/Database/AbstractRepository.php +++ b/src/S2/Rose/Storage/Database/AbstractRepository.php @@ -27,19 +27,18 @@ abstract class AbstractRepository public const METADATA = 'metadata'; public const SNIPPET = 'snippet'; public const FULLTEXT_INDEX = 'fulltext_index'; - public const KEYWORD_INDEX = 'keyword_index'; - public const KEYWORD_MULTIPLE_INDEX = 'keyword_multiple_index'; - protected const DEFAULT_TABLE_NAMES = [ + protected const DEFAULT_TABLE_NAMES = [ self::TOC => 'toc', self::WORD => 'word', self::METADATA => 'metadata', self::SNIPPET => 'snippet', self::FULLTEXT_INDEX => 'fulltext_index', - self::KEYWORD_INDEX => 'keyword_index', - self::KEYWORD_MULTIPLE_INDEX => 'keyword_multiple_index', ]; + protected const POSITION_PREFIX_KEYWORD = 'k'; + protected const POSITION_PREFIX_TITLE = 't'; + protected string $prefix; protected array $options; protected \PDO $pdo; @@ -158,20 +157,28 @@ public function findIdsByWords(array $words): array /** * @throws UnknownException */ - public function insertFulltext(array $words, array $wordIds, int $internalId): void + public function insertFulltext(array $titleWords, array $keywords, array $contentWords, array $wordIds, int $internalId): void { $data = []; - foreach ($words as $position => $word) { - $key = $wordIds[$word]; - $data[$key][] = (int)$position; + foreach ( + [ + [$titleWords, self::POSITION_PREFIX_TITLE], + [$keywords, self::POSITION_PREFIX_KEYWORD], + [$contentWords, ''], + ] as [$words, $prefix] + ) { + foreach ($words as $position => $word) { // float $position + $wordId = $wordIds[$word]; + $data[$wordId][] = $prefix . ((string)(int)$position); + } } if (\count($data) === 0) { return; } $sqlParts = ''; - foreach ($data as $wordId => $positions) { - $sqlParts .= ($sqlParts !== '' ? ',' : '') . '(' . $wordId . ',' . $internalId . ',\'' . implode(',', $positions) . '\')'; + foreach ($data as $wordId => $prefixedPositions) { + $sqlParts .= ($sqlParts !== '' ? ',' : '') . '(' . $wordId . ',' . $internalId . ',\'' . implode(',', $prefixedPositions) . '\')'; } $sql = 'INSERT INTO ' . $this->getTableName(self::FULLTEXT_INDEX) @@ -192,7 +199,7 @@ public function insertFulltext(array $words, array $wordIds, int $internalId): v * @throws EmptyIndexException * @throws UnknownException */ - public function findFulltextByWords(array $words, int $instanceId = null): array + public function findFulltextByWords(array $words, int $instanceId = null): \Generator { $sql = ' SELECT w.name AS word, t.external_id, t.instance_id, f.positions, COALESCE(m.word_count, 0) AS word_count @@ -226,28 +233,20 @@ public function findFulltextByWords(array $words, int $instanceId = null): array ), 0, $e); } - $data = $statement->fetchAll(\PDO::FETCH_ASSOC); - - foreach ($data as &$row) { - $row['positions'] = explode(',', $row['positions']); - } - unset($row); - - return $data; - } - - /** - * @param string[] $words - */ - public function insertKeywords(array $words, int $internalId, int $type, string $tableKey): void - { - $data = []; - foreach ($words as $keyword) {// Ready for bulk insert - $data[] = $this->pdo->quote($keyword) . ',' . $internalId . ',' . $type; + // TODO think about DTO + while ($row = $statement->fetch(\PDO::FETCH_ASSOC)) { + $row['title_positions'] = $row['keyword_positions'] = $row['content_positions'] = []; + foreach (explode(',', $row['positions']) as $positionWithPrefix) { + if ($positionWithPrefix[0] === self::POSITION_PREFIX_TITLE) { + $row['title_positions'][] = substr($positionWithPrefix, 1); + } else if ($positionWithPrefix[0] === self::POSITION_PREFIX_KEYWORD) { + $row['keyword_positions'][] = substr($positionWithPrefix, 1); + } else { + $row['content_positions'][] = $positionWithPrefix; + } + } + yield $row; } - - $sql = 'INSERT INTO ' . $this->getTableName($tableKey) . ' (keyword, toc_id, type) VALUES ( ' . implode('),(', $data) . ')'; - $this->pdo->exec($sql); } /** @@ -288,100 +287,6 @@ public function insertSnippets(int $internalId, SnippetSource ...$snippetInfo): } } - /** - * @param string[] $words - * - * @throws EmptyIndexException - * @throws UnknownException - */ - public function findSingleKeywordIndex(array $words, ?int $instanceId): array - { - $usageSql = ' - SELECT COUNT(DISTINCT f.toc_id) - FROM ' . $this->getTableName(self::FULLTEXT_INDEX) . ' AS f - JOIN ' . $this->getTableName(self::WORD) . ' AS w ON w.id = f.word_id - ' . ( - $instanceId !== null - ? 'JOIN ' . $this->getTableName(self::TOC) . ' AS t ON t.id = f.toc_id AND t.instance_id = ' . $instanceId . ' ' - : '' - ) . ' WHERE k.keyword = w.name'; - - $sql = ' - SELECT - k.keyword, - t.external_id, - t.instance_id, - k.type, - (' . $usageSql . ') AS usage_num - FROM ' . $this->getTableName(self::KEYWORD_INDEX) . ' AS k - JOIN ' . $this->getTableName(self::TOC) . ' AS t ON t.id = k.toc_id - WHERE k.keyword IN (' . implode(',', array_fill(0, \count($words), '?')) . ') - '; - - $parameters = $words; - if ($instanceId !== null) { - $sql .= ' AND t.instance_id = ?'; - $parameters[] = $instanceId; - } - - try { - $st = $this->pdo->prepare($sql); - $st->execute($parameters); - } catch (\PDOException $e) { - if ($this->isUnknownTableException($e)) { - throw new EmptyIndexException('There are no storage tables in the database. Call ' . __CLASS__ . '::erase() first.', 0, $e); - } - throw new UnknownException(sprintf( - 'Unknown exception "%s" occurred while single keywords searching: "%s".', - $e->getCode(), - $e->getMessage() - ), 0, $e); - } - - $data = $st->fetchAll(\PDO::FETCH_ASSOC); - - return $data; - } - - /** - * @throws EmptyIndexException - * @throws UnknownException - */ - public function findMultipleKeywordIndex(string $string, ?int $instanceId): array - { - $sql = ' - SELECT t.external_id, t.instance_id, k.type - FROM ' . $this->getTableName(self::KEYWORD_MULTIPLE_INDEX) . ' AS k - JOIN ' . $this->getTableName(self::TOC) . ' AS t ON t.id = k.toc_id - WHERE k.keyword LIKE ? ESCAPE \'=\' - '; - - $parameters = ['% ' . $this->escapeLike($string, '=') . ' %']; - if ($instanceId !== null) { - $sql .= ' AND t.instance_id = ?'; - $parameters[] = $instanceId; - } - - try { - $statement = $this->pdo->prepare($sql); - $statement->execute($parameters); - } catch (\PDOException $e) { - if ($this->isUnknownTableException($e)) { - throw new EmptyIndexException('There are no storage tables in the database. Call ' . __CLASS__ . '::erase() first.', 0, $e); - } - throw new UnknownException(sprintf( - 'Unknown exception "%s" occurred while multiple keywords searching: "%s".', - $e->getCode(), - $e->getMessage() - ), 0, $e); - } - - // TODO \PDO::FETCH_UNIQUE seems to be a hack for caller. Rewrite using INSERT IGNORE? @see \S2\Rose\Storage\KeywordIndexContent::add - $data = $statement->fetchAll(\PDO::FETCH_COLUMN | \PDO::FETCH_GROUP | \PDO::FETCH_UNIQUE); - - return $data; - } - public function getSnippets(SnippetQuery $snippetQuery): array { $internalIds = $this->selectInternalIds(...$snippetQuery->getExternalIds()); @@ -453,12 +358,6 @@ public function removeFromIndex(ExternalId $externalId): void $st = $this->pdo->prepare("DELETE FROM {$this->getTableName(self::FULLTEXT_INDEX)} WHERE toc_id = ?"); $st->execute([$tocId]); - $st = $this->pdo->prepare("DELETE FROM {$this->getTableName(self::KEYWORD_INDEX)} WHERE toc_id = ?"); - $st->execute([$tocId]); - - $st = $this->pdo->prepare("DELETE FROM {$this->getTableName(self::KEYWORD_MULTIPLE_INDEX)} WHERE toc_id = ?"); - $st->execute([$tocId]); - $st = $this->pdo->prepare("DELETE FROM {$this->getTableName(self::METADATA)} WHERE toc_id = ?"); $st->execute([$tocId]); diff --git a/src/S2/Rose/Storage/Database/MysqlRepository.php b/src/S2/Rose/Storage/Database/MysqlRepository.php index a1fed75..842afe4 100644 --- a/src/S2/Rose/Storage/Database/MysqlRepository.php +++ b/src/S2/Rose/Storage/Database/MysqlRepository.php @@ -336,23 +336,5 @@ private function dropAndCreateTables(string $charset, int $keyLen): void PRIMARY KEY (`id`), UNIQUE KEY (name(' . $keyLen . ')) ) ENGINE=InnoDB CHARACTER SET ' . $charset . ' COLLATE ' . $charset . '_bin'); - - $this->pdo->exec('DROP TABLE IF EXISTS ' . $this->getTableName(self::KEYWORD_INDEX) . ';'); - $this->pdo->exec('CREATE TABLE ' . $this->getTableName(self::KEYWORD_INDEX) . ' ( - keyword VARCHAR(255) NOT NULL, - toc_id INT(11) UNSIGNED NOT NULL, - type INT(11) UNSIGNED NOT NULL, - KEY (keyword(' . $keyLen . ')), - KEY (toc_id) - ) ENGINE=InnoDB CHARACTER SET ' . $charset); - - $this->pdo->exec('DROP TABLE IF EXISTS ' . $this->getTableName(self::KEYWORD_MULTIPLE_INDEX) . ';'); - $this->pdo->exec('CREATE TABLE ' . $this->getTableName(self::KEYWORD_MULTIPLE_INDEX) . ' ( - keyword VARCHAR(255) NOT NULL, - toc_id INT(11) UNSIGNED NOT NULL, - type INT(11) UNSIGNED NOT NULL, - KEY (keyword(' . $keyLen . ')), - KEY (toc_id) - ) ENGINE=InnoDB CHARACTER SET ' . $charset); } } diff --git a/src/S2/Rose/Storage/Database/PdoStorage.php b/src/S2/Rose/Storage/Database/PdoStorage.php index 496ada1..7a4ec83 100644 --- a/src/S2/Rose/Storage/Database/PdoStorage.php +++ b/src/S2/Rose/Storage/Database/PdoStorage.php @@ -23,7 +23,6 @@ use S2\Rose\Storage\Exception\EmptyIndexException; use S2\Rose\Storage\Exception\InvalidEnvironmentException; use S2\Rose\Storage\FulltextIndexContent; -use S2\Rose\Storage\KeywordIndexContent; use S2\Rose\Storage\StorageEraseInterface; use S2\Rose\Storage\StorageReadInterface; use S2\Rose\Storage\StorageWriteInterface; @@ -70,60 +69,17 @@ public function erase(): void * @throws UnknownException * @throws InvalidEnvironmentException */ - public function fulltextResultByWords(array $words, $instanceId = null) + public function fulltextResultByWords(array $words, $instanceId = null): FulltextIndexContent { $result = new FulltextIndexContent(); - if (empty($words)) { + if (\count($words) === 0) { return $result; } - $data = $this->getRepository()->findFulltextByWords($words, $instanceId); + $generator = $this->getRepository()->findFulltextByWords($words, $instanceId); - foreach ($data as $row) { - $result->add($row['word'], $this->getExternalIdFromRow($row), $row['positions'], $row['word_count']); - } - - return $result; - } - - /** - * {@inheritdoc} - * @throws EmptyIndexException - * @throws UnknownException - * @throws InvalidEnvironmentException - */ - public function getSingleKeywordIndexByWords(array $words, $instanceId = null) - { - $data = $this->getRepository()->findSingleKeywordIndex($words, $instanceId); - $tocSize = $this->getTocSize($instanceId); - - /** @var KeywordIndexContent[]|array $result */ - $result = []; - foreach ($data as $row) { - if (!isset($result[$row['keyword']])) { - $result[$row['keyword']] = new KeywordIndexContent(); - } - - // TODO Making items unique seems to be a hack for caller. Rewrite indexing using INSERT IGNORE? @see \S2\Rose\Storage\KeywordIndexContent::add - $result[$row['keyword']]->add($this->getExternalIdFromRow($row), $row['type'], $tocSize, $row['usage_num']); - } - - return $result; - } - - /** - * {@inheritdoc} - * @throws UnknownException - * @throws EmptyIndexException - * @throws InvalidEnvironmentException - */ - public function getMultipleKeywordIndexByString($string, $instanceId = null) - { - $data = $this->getRepository()->findMultipleKeywordIndex($string, $instanceId); - - $result = new KeywordIndexContent(); - foreach ($data as $row) { - $result->add($this->getExternalIdFromRow($row), $row['type']); + foreach ($generator as $row) { + $result->add($row['word'], $this->getExternalIdFromRow($row), $row['title_positions'], $row['keyword_positions'], $row['content_positions'], $row['word_count']); } return $result; @@ -153,57 +109,27 @@ public function getSnippets(SnippetQuery $snippetQuery): SnippetResult * @throws UnknownIdException * @throws \S2\Rose\Exception\RuntimeException */ - public function addToFulltext(array $words, ExternalId $externalId) + public function addToFulltextIndex(array $titleWords, array $keywords, array $contentWords, ExternalId $externalId): void { - if (empty($words)) { + if (empty($contentWords)) { return; } $internalId = $this->getInternalIdFromExternalId($externalId); - $wordIds = $this->getWordIds($words); + $wordIds = $this->getWordIds(array_merge($contentWords, $titleWords, $keywords)); - $this->getRepository()->insertFulltext($words, $wordIds, $internalId); + $this->getRepository()->insertFulltext($titleWords, $keywords, $contentWords, $wordIds, $internalId); } /** * {@inheritdoc} */ - public function isExcluded($word) + public function isExcludedWord(string $word): bool { // Nothing is excluded in current DB storage implementation. return false; } - /** - * {@inheritdoc} - * - * @throws UnknownException - * @throws UnknownIdException - */ - public function addToSingleKeywordIndex($word, ExternalId $externalId, $type) - { - try { - $this->addKeywordToDb($word, $externalId, $type, MysqlRepository::KEYWORD_INDEX); - } catch (\PDOException $e) { - throw new UnknownException('Unknown exception occurred while single keyword indexing:' . $e->getMessage(), $e->getCode(), $e); - } - } - - /** - * {@inheritdoc} - * - * @throws UnknownException - * @throws UnknownIdException - */ - public function addToMultipleKeywordIndex($string, ExternalId $externalId, $type) - { - try { - $this->addKeywordToDb($string, $externalId, $type, MysqlRepository::KEYWORD_MULTIPLE_INDEX); - } catch (\PDOException $e) { - throw new UnknownException('Unknown exception occurred while multiple keyword indexing:' . $e->getMessage(), $e->getCode(), $e); - } - } - /** * {@inheritdoc} * @@ -239,7 +165,7 @@ public function addSnippets(ExternalId $externalId, SnippetSource ...$snippets): * @throws UnknownException * @throws InvalidEnvironmentException */ - public function removeFromIndex(ExternalId $externalId) + public function removeFromIndex(ExternalId $externalId): void { $this->getRepository()->removeFromIndex($externalId); } @@ -251,7 +177,7 @@ public function removeFromIndex(ExternalId $externalId) * @throws UnknownException * @throws InvalidEnvironmentException */ - public function addEntryToToc(TocEntry $entry, ExternalId $externalId) + public function addEntryToToc(TocEntry $entry, ExternalId $externalId): void { $this->getRepository()->addToToc($entry, $externalId); @@ -272,7 +198,7 @@ public function addEntryToToc(TocEntry $entry, ExternalId $externalId) * @throws UnknownException * @throws InvalidEnvironmentException */ - public function getTocByExternalIds(ExternalIdCollection $externalIds, $instanceId = null) + public function getTocByExternalIds(ExternalIdCollection $externalIds): array { $data = $this->getRepository()->getTocEntries(['ids' => $externalIds]); @@ -300,7 +226,7 @@ public function getTocByTitlePrefix($titlePrefix) * @throws UnknownException * @throws InvalidEnvironmentException */ - public function getTocByExternalId(ExternalId $externalId) + public function getTocByExternalId(ExternalId $externalId): ?TocEntry { $entries = $this->getTocByExternalIds(new ExternalIdCollection([$externalId])); @@ -314,7 +240,7 @@ public function getTocByExternalId(ExternalId $externalId) * @throws UnknownException * @throws InvalidEnvironmentException */ - public function getTocSize($instanceId) + public function getTocSize(?int $instanceId): int { return $this->getRepository()->getTocSize($instanceId); } @@ -326,7 +252,7 @@ public function getTocSize($instanceId) * @throws UnknownException * @throws InvalidEnvironmentException */ - public function removeFromToc(ExternalId $externalId) + public function removeFromToc(ExternalId $externalId): void { $this->getRepository()->removeFromToc($externalId); $this->mapping->remove($externalId); @@ -408,20 +334,19 @@ public function getIndexStat() } /** - * TODO move to another class + * TODO move to another class? * * @param string[] $words * - * @return int[] - * @throws EmptyIndexException + * @return int[]|array * @throws UnknownException * @throws \S2\Rose\Exception\RuntimeException */ - protected function getWordIds(array $words) + protected function getWordIds(array $words): array { $knownWords = []; $unknownWords = []; - foreach ($words as $k => $word) { + foreach ($words as $word) { if (isset($this->cachedWordIds[$word])) { $knownWords[$word] = $this->cachedWordIds[$word]; } else { @@ -460,21 +385,6 @@ protected function getWordIds(array $words) throw new LogicException('Inserted words not found. Unknown words: ' . var_export($unknownWords, true)); } - /** - * @param string $word - * @param ExternalId $externalId - * @param int $type - * @param string $tableKey - * - * @throws UnknownIdException - * @throws InvalidEnvironmentException - */ - private function addKeywordToDb($word, ExternalId $externalId, $type, $tableKey) - { - $internalId = $this->getInternalIdFromExternalId($externalId); - $this->getRepository()->insertKeywords([$word], $internalId, $type, $tableKey); - } - /** * @param ExternalId $externalId * diff --git a/src/S2/Rose/Storage/Database/PostgresRepository.php b/src/S2/Rose/Storage/Database/PostgresRepository.php index b89bc09..ae38eda 100644 --- a/src/S2/Rose/Storage/Database/PostgresRepository.php +++ b/src/S2/Rose/Storage/Database/PostgresRepository.php @@ -300,35 +300,5 @@ private function dropAndCreateTables(): void name VARCHAR(255) NOT NULL DEFAULT \'\', UNIQUE (name) )'); - - $this->pdo->exec('DROP TABLE IF EXISTS ' . $this->getTableName(self::KEYWORD_INDEX) . ';'); - $this->pdo->exec('CREATE TABLE ' . $this->getTableName(self::KEYWORD_INDEX) . ' ( - keyword VARCHAR(255) NOT NULL, - toc_id INT NOT NULL, - type INT NOT NULL - )'); - $this->pdo->exec(sprintf( - 'CREATE INDEX idx_%1$s_toc_id ON %1$s (toc_id);', - $this->getTableName(self::KEYWORD_INDEX) - )); - $this->pdo->exec(sprintf( - 'CREATE INDEX idx_%1$s_keyword ON %1$s (keyword);', - $this->getTableName(self::KEYWORD_INDEX) - )); - - $this->pdo->exec('DROP TABLE IF EXISTS ' . $this->getTableName(self::KEYWORD_MULTIPLE_INDEX) . ';'); - $this->pdo->exec('CREATE TABLE ' . $this->getTableName(self::KEYWORD_MULTIPLE_INDEX) . ' ( - keyword VARCHAR(255) NOT NULL, - toc_id INT NOT NULL, - type INT NOT NULL - )'); - $this->pdo->exec(sprintf( - 'CREATE INDEX idx_%1$s_toc_id ON %1$s (toc_id);', - $this->getTableName(self::KEYWORD_MULTIPLE_INDEX) - )); - $this->pdo->exec(sprintf( - 'CREATE INDEX idx_%1$s_keyword ON %1$s (keyword);', - $this->getTableName(self::KEYWORD_MULTIPLE_INDEX) - )); } } diff --git a/src/S2/Rose/Storage/Database/SqliteRepository.php b/src/S2/Rose/Storage/Database/SqliteRepository.php index 09776ce..29e9a6c 100644 --- a/src/S2/Rose/Storage/Database/SqliteRepository.php +++ b/src/S2/Rose/Storage/Database/SqliteRepository.php @@ -315,35 +315,5 @@ private function dropAndCreateTables(): void name VARCHAR(255) NOT NULL DEFAULT \'\', UNIQUE (name) )'); - - $this->pdo->exec('DROP TABLE IF EXISTS ' . $this->getTableName(self::KEYWORD_INDEX) . ';'); - $this->pdo->exec('CREATE TABLE ' . $this->getTableName(self::KEYWORD_INDEX) . ' ( - keyword VARCHAR(255) NOT NULL, - toc_id INTEGER NOT NULL, - type INTEGER NOT NULL - )'); - $this->pdo->exec(sprintf( - 'CREATE INDEX idx_%1$s_toc_id ON %1$s (toc_id);', - $this->getTableName(self::KEYWORD_INDEX) - )); - $this->pdo->exec(sprintf( - 'CREATE INDEX idx_%1$s_keyword ON %1$s (keyword);', - $this->getTableName(self::KEYWORD_INDEX) - )); - - $this->pdo->exec('DROP TABLE IF EXISTS ' . $this->getTableName(self::KEYWORD_MULTIPLE_INDEX) . ';'); - $this->pdo->exec('CREATE TABLE ' . $this->getTableName(self::KEYWORD_MULTIPLE_INDEX) . ' ( - keyword VARCHAR(255) NOT NULL, - toc_id INTEGER NOT NULL, - type INTEGER NOT NULL - )'); - $this->pdo->exec(sprintf( - 'CREATE INDEX idx_%1$s_toc_id ON %1$s (toc_id);', - $this->getTableName(self::KEYWORD_MULTIPLE_INDEX) - )); - $this->pdo->exec(sprintf( - 'CREATE INDEX idx_%1$s_keyword ON %1$s (keyword);', - $this->getTableName(self::KEYWORD_MULTIPLE_INDEX) - )); } } diff --git a/src/S2/Rose/Storage/FulltextIndexContent.php b/src/S2/Rose/Storage/FulltextIndexContent.php index 025a79a..2aeaa15 100644 --- a/src/S2/Rose/Storage/FulltextIndexContent.php +++ b/src/S2/Rose/Storage/FulltextIndexContent.php @@ -14,16 +14,20 @@ class FulltextIndexContent protected array $dataByWord = []; protected array $dataByExternalId = []; - public function add(string $word, ExternalId $externalId, array $positions, int $wordCount = 0): void + public function add(string $word, ExternalId $externalId, array $titlePositions, array $keywordPositions, array $contentPositions, int $wordCount = 0): void { $serializedExtId = $externalId->toString(); - $this->dataByExternalId[$serializedExtId][$word] = $positions; + if (\count($contentPositions) > 0) { + $this->dataByExternalId[$serializedExtId][$word] = $contentPositions; + } // TODO refactor this data transformation $this->dataByWord[$word][$serializedExtId]['extId'] = $externalId; $this->dataByWord[$word][$serializedExtId]['wordCount'] = $wordCount; - $this->dataByWord[$word][$serializedExtId]['pos'] = $positions; + $this->dataByWord[$word][$serializedExtId]['tpos'] = $titlePositions; + $this->dataByWord[$word][$serializedExtId]['kpos'] = $keywordPositions; + $this->dataByWord[$word][$serializedExtId]['pos'] = $contentPositions; } /** @@ -35,7 +39,7 @@ public function toArray(): array return $this->dataByWord; } - public function iterateWordPositions(\Closure $callback): void + public function iterateContentWordPositions(\Closure $callback): void { foreach ($this->dataByExternalId as $serializedExtId => $data) { $callback(ExternalId::fromString($serializedExtId), new WordPositionContainer($data)); diff --git a/src/S2/Rose/Storage/FulltextProxyInterface.php b/src/S2/Rose/Storage/FulltextProxyInterface.php index 2db4881..e918672 100644 --- a/src/S2/Rose/Storage/FulltextProxyInterface.php +++ b/src/S2/Rose/Storage/FulltextProxyInterface.php @@ -1,51 +1,28 @@ data[$externalId->toString() . Finder::TYPE_KEYWORD]); - } elseif (isset($this->data[$externalId->toString() . Finder::TYPE_TITLE])) { - // Do not overwrite with low priority - return $this; - } - - $this->data[$externalId->toString() . $type] = [$externalId, $type, $tocSize, $foundTocEntriesNum]; - - return $this; - } - - public function iterate(\Closure $callback): void - { - foreach ($this->data as $params) { - $callback(...$params); - } - } -} diff --git a/src/S2/Rose/Storage/StorageReadInterface.php b/src/S2/Rose/Storage/StorageReadInterface.php index 8e52f9d..5922976 100644 --- a/src/S2/Rose/Storage/StorageReadInterface.php +++ b/src/S2/Rose/Storage/StorageReadInterface.php @@ -15,48 +15,17 @@ interface StorageReadInterface { /** * @param string[] $words - * @param null $instanceId - * - * @return FulltextIndexContent */ - public function fulltextResultByWords(array $words, $instanceId = null); + public function fulltextResultByWords(array $words, ?int $instanceId): FulltextIndexContent; - /** - * @param string $word - * - * @return bool - */ - public function isExcluded($word); - - /** - * @param string[] $words - * @param int|null $instanceId - * - * @return array|KeywordIndexContent[] - */ - public function getSingleKeywordIndexByWords(array $words, $instanceId = null); + public function isExcludedWord(string $word): bool; /** - * @param string $string - * @param int|null $instanceId - * - * @return KeywordIndexContent - */ - public function getMultipleKeywordIndexByString($string, $instanceId = null); - - /** - * @param ExternalIdCollection $externalIds - * * @return TocEntryWithMetadata[] */ - public function getTocByExternalIds(ExternalIdCollection $externalIds); + public function getTocByExternalIds(ExternalIdCollection $externalIds): array; public function getSnippets(SnippetQuery $snippetQuery): SnippetResult; - /** - * @param int|null $instanceId - * - * @return int - */ - public function getTocSize($instanceId); + public function getTocSize(?int $instanceId): int; } diff --git a/src/S2/Rose/Storage/StorageWriteInterface.php b/src/S2/Rose/Storage/StorageWriteInterface.php index e842e48..fc5df41 100644 --- a/src/S2/Rose/Storage/StorageWriteInterface.php +++ b/src/S2/Rose/Storage/StorageWriteInterface.php @@ -14,56 +14,24 @@ interface StorageWriteInterface { /** - * @param array $words Keys are the positions of corresponding words. - * @param ExternalId $externalId + * @param array $titleWords Keys are the positions of corresponding words. + * @param array $keywords Keys are the positions of corresponding words. + * @param array $contentWords Keys are the positions of corresponding words. */ - public function addToFulltext(array $words, ExternalId $externalId); + public function addToFulltextIndex(array $titleWords, array $keywords, array $contentWords, ExternalId $externalId): void; - /** - * @param ExternalId $externalId - */ - public function removeFromIndex(ExternalId $externalId); - - /** - * @param string $word - * - * @return bool - */ - public function isExcluded($word); - - /** - * @param string $word - * @param ExternalId $externalId - * @param string $type - */ - public function addToSingleKeywordIndex($word, ExternalId $externalId, $type); + public function removeFromIndex(ExternalId $externalId): void; - /** - * @param string $string - * @param ExternalId $externalId - * @param string $type - */ - public function addToMultipleKeywordIndex($string, ExternalId $externalId, $type); + public function isExcludedWord(string $word): bool; - /** - * @param TocEntry $entry - * @param ExternalId $externalId - */ - public function addEntryToToc(TocEntry $entry, ExternalId $externalId); + public function addEntryToToc(TocEntry $entry, ExternalId $externalId): void; /** * TODO How can a read method be eliminated from the writer interface? - * - * @param ExternalId $externalId - * - * @return TocEntry */ - public function getTocByExternalId(ExternalId $externalId); + public function getTocByExternalId(ExternalId $externalId): ?TocEntry; - /** - * @param ExternalId $externalId - */ - public function removeFromToc(ExternalId $externalId); + public function removeFromToc(ExternalId $externalId): void; /** * Save some additional info about indexing items diff --git a/tests/unit/Rose/FinderTest.php b/tests/unit/Rose/FinderTest.php index f525aa5..aa4e898 100644 --- a/tests/unit/Rose/FinderTest.php +++ b/tests/unit/Rose/FinderTest.php @@ -1,7 +1,7 @@ static function ($word) { - return new KeywordIndexContent(); - }, 'getTocSize' => static function () { return 30; }, @@ -43,35 +39,37 @@ public function testIgnoreFrequentWordsInFulltext(): void $result = new FulltextIndexContent(); foreach ($words as $k => $word) { if ($word === 'find') { - $result->add($word, new ExternalId('id_3'), [1]); - $result->add($word, new ExternalId('id_2'), [10, 20]); + $result->add($word, new ExternalId('id_3'), [], [], [1]); + $result->add($word, new ExternalId('id_2'), [], [1], [10, 20]); + $result->add($word, new ExternalId('id_1'), [1], [], []); } if ($word === 'and') { - $result->add($word, new ExternalId('id_1'), [4, 8]); - $result->add($word, new ExternalId('id_2'), [7, 11, 34]); - $result->add($word, new ExternalId('id_3'), [28, 65]); - $result->add($word, new ExternalId('id_4'), [45, 9]); + $result->add($word, new ExternalId('id_1'), [], [], [4, 8]); + $result->add($word, new ExternalId('id_2'), [], [], [7, 11, 34]); + $result->add($word, new ExternalId('id_3'), [], [], [28, 65]); + $result->add($word, new ExternalId('id_4'), [], [], [45, 9]); - $result->add($word, new ExternalId('id_5'), [1]); - $result->add($word, new ExternalId('id_6'), [1]); - $result->add($word, new ExternalId('id_7'), [1]); - $result->add($word, new ExternalId('id_8'), [1]); - $result->add($word, new ExternalId('id_9'), [1]); - $result->add($word, new ExternalId('id_10'), [1]); - $result->add($word, new ExternalId('id_11'), [1]); - $result->add($word, new ExternalId('id_12'), [1]); - $result->add($word, new ExternalId('id_13'), [1]); - $result->add($word, new ExternalId('id_14'), [1]); - $result->add($word, new ExternalId('id_15'), [1]); - $result->add($word, new ExternalId('id_16'), [1]); - $result->add($word, new ExternalId('id_17'), [1]); - $result->add($word, new ExternalId('id_18'), [1]); - $result->add($word, new ExternalId('id_19'), [1]); - $result->add($word, new ExternalId('id_20'), [1]); - $result->add($word, new ExternalId('id_21'), [1]); + $result->add($word, new ExternalId('id_5'), [], [], [1]); + $result->add($word, new ExternalId('id_6'), [], [], [1]); + $result->add($word, new ExternalId('id_7'), [], [], [1]); + $result->add($word, new ExternalId('id_8'), [], [], [1]); + $result->add($word, new ExternalId('id_9'), [], [], [1]); + $result->add($word, new ExternalId('id_10'), [], [], [1]); + $result->add($word, new ExternalId('id_11'), [], [], [1]); + $result->add($word, new ExternalId('id_12'), [], [], [1]); + $result->add($word, new ExternalId('id_13'), [], [], [1]); + $result->add($word, new ExternalId('id_14'), [], [], [1]); + $result->add($word, new ExternalId('id_15'), [], [], [1]); + $result->add($word, new ExternalId('id_16'), [], [], [1]); + $result->add($word, new ExternalId('id_17'), [], [], [1]); + $result->add($word, new ExternalId('id_18'), [], [], [1]); + $result->add($word, new ExternalId('id_19'), [], [], [1]); + $result->add($word, new ExternalId('id_20'), [], [], [1]); + $result->add($word, new ExternalId('id_21'), [], [], [1]); } if ($word === 'replace') { - $result->add($word, new ExternalId('id_2'), [12]); + $result->add($word, new ExternalId('id_2'), [], [], [12]); + $result->add($word, new ExternalId('id_1'), [1], [], []); } unset($words[$k]); @@ -83,22 +81,6 @@ public function testIgnoreFrequentWordsInFulltext(): void return $result; }, - 'getSingleKeywordIndexByWords' => static function ($words) { - $result = []; - foreach ($words as $word) { - if ($word === 'find') { - $result[$word] = (new KeywordIndexContent())->add(new ExternalId('id_1'), 1)->add(new ExternalId('id_2'), 2); - } elseif ($word === 'and') { - $result[$word] = new KeywordIndexContent(); - } elseif ($word === 'replace') { - $result[$word] = (new KeywordIndexContent())->add(new ExternalId('id_1'), 1); - } else { - throw new \RuntimeException(sprintf('Unknown word "%s" in StorageReadInterface stub.', $word)); - } - } - - return $result; - }, 'getTocByExternalIds' => static function (ExternalIdCollection $ids) { return array_map(static function (ExternalId $id) { return new TocEntryWithMetadata( diff --git a/tests/unit/Rose/IntegrationTest.php b/tests/unit/Rose/IntegrationTest.php index b7f5efb..b243da6 100644 --- a/tests/unit/Rose/IntegrationTest.php +++ b/tests/unit/Rose/IntegrationTest.php @@ -166,7 +166,7 @@ public function testFeatures( 'Русский текст. Красным заголовком', $resultItems4[0]->getHighlightedTitle($stemmer) ); - $this->assertEquals(44.89947331141201, $resultSet4->getItems()[0]->getRelevance()); + $this->assertEquals(50.9482683735414, $resultSet4->getItems()[0]->getRelevance()); // Query 5 $resultSet5 = $finder->find(new Query('русский')); diff --git a/tests/unit/Rose/Storage/PdoStorageTest.php b/tests/unit/Rose/Storage/PdoStorageTest.php index 7d2bbb9..27e9884 100644 --- a/tests/unit/Rose/Storage/PdoStorageTest.php +++ b/tests/unit/Rose/Storage/PdoStorageTest.php @@ -65,8 +65,8 @@ public function testStorage() $tocEntry2 = new TocEntry('', '', new \DateTime('2014-05-28'), '', 1, 'pokjhgtyuio'); $storage->addEntryToToc($tocEntry2, $externalId2); - $storage->addToFulltext([1 => 'word1', 2 => 'word2'], $externalId1); - $storage->addToFulltext([1 => 'word2', 10 => 'word2'], $externalId2); + $storage->addToFulltextIndex([1 => 'word1', 2 => 'word2'], $externalId1); + $storage->addToFulltextIndex([1 => 'word2', 10 => 'word2'], $externalId2); $stat = $storage->getIndexStat(); $this->assertGreaterThan(0, $stat['bytes']); @@ -232,8 +232,8 @@ public function testDiacritic() new TocEntry('title 1', 'descr 1', new \DateTime('2014-05-28'), '', 1, '123456789'), new ExternalId('id_1') ); - $storage->addToFulltext(['Flugel', 'Shlomo', 'Tormented'], new ExternalId('id_1')); - $storage->addToFulltext(['Flügel', 'Shlømo', 'Tørmented'], new ExternalId('id_1')); + $storage->addToFulltextIndex(['Flugel', 'Shlomo', 'Tormented'], new ExternalId('id_1')); + $storage->addToFulltextIndex(['Flügel', 'Shlømo', 'Tørmented'], new ExternalId('id_1')); } public function testLongWords() @@ -251,29 +251,29 @@ public function testLongWords() new ExternalId('id_2') ); - $storage->addToFulltext(['word', 'iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8cim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_1')); - $storage->addToFulltext(['word', 'iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8cim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_2')); + $storage->addToFulltextIndex(['word', 'iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8cim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_1')); + $storage->addToFulltextIndex(['word', 'iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8cim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_2')); - $storage->addToFulltext(['word2', '9siufiai279837jz972q39z78qao298m3apq8n9283j298cnq08498908ks09809r8mc9o90q7808sdolfjlis39w8kso0sdu87j934797239478o7o3j4d573p985jkdx37oc8so89o3849os8l948o9l8884iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8is8ajpk9xox8jo9834k0ax8k4r9o8wk9o38rmoc8mo95m8co83km898madkjflikjiuroiuiweru0198390u90qu0p98784kqz8p94xco8mcim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_1')); - $storage->addToFulltext(['word2', '9siufiai279837jz972q39z78qao298m3apq8n9283j298cnq08498908ks09809r8mc9o90q7808sdolfjlis39w8kso0sdu87j934797239478o7o3j4d573p985jkdx37oc8so89o3849os8l948o9l8884iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8is8ajpk9xox8jo9834k0ax8k4r9o8wk9o38rmoc8mo95m8co83km898madkjflikjiuroiuiweru0198390u90qu0p98784kqz8p94xco8mcim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_2')); + $storage->addToFulltextIndex(['word2', '9siufiai279837jz972q39z78qao298m3apq8n9283j298cnq08498908ks09809r8mc9o90q7808sdolfjlis39w8kso0sdu87j934797239478o7o3j4d573p985jkdx37oc8so89o3849os8l948o9l8884iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8is8ajpk9xox8jo9834k0ax8k4r9o8wk9o38rmoc8mo95m8co83km898madkjflikjiuroiuiweru0198390u90qu0p98784kqz8p94xco8mcim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_1')); + $storage->addToFulltextIndex(['word2', '9siufiai279837jz972q39z78qao298m3apq8n9283j298cnq08498908ks09809r8mc9o90q7808sdolfjlis39w8kso0sdu87j934797239478o7o3j4d573p985jkdx37oc8so89o3849os8l948o9l8884iu9304809n87908p08309xm8938noue09x78349c7m3098kx09237498xn89738j9457xp98q754891209834xm928349o7978x94987n89o7908x98984390n2cj347x89793857c9879oxieru9084920x83497nm37nosaujwaeuj034iroefjj98r3epw8is8ajpk9xox8jo9834k0ax8k4r9o8wk9o38rmoc8mo95m8co83km898madkjflikjiuroiuiweru0198390u90qu0p98784kqz8p94xco8mcim9or8439urno9eufoluia039480pifou93'], new ExternalId('id_2')); - $storage->addToFulltext(['word21', + $storage->addToFulltextIndex(['word21', 'wwjfau8wmtbmse9uvlr2ynrlkzlvdhe3mvgytjvls1jkvm1qmjvnk2jsbeYxcvznqk9or3a4vu1luzzwy1lcevltndvsskfmufnsrwt3cxduYytxqlhnyk5bbnvozkttmujlrwqyawxfexrsr1zcvzvnsgjru0tlvvr0mjryr2hxbxpaznrnywrzv0vnbmpjdja5t1rzzefkallmmevysva3zkv3cju5dvvazjbmaju5bdixvkewbuqvsuvywgdqatc5wejyt2tvntvswwx1tezhqxb1l3dkuxl5awpoqllev245vstiajfddxphwfqxvgvpegjdv3jseu9lbe1vqmxhrklla3bsrm9xukntakirwxldc3i5zjdzoggwymplmfpgrgrxkzg3qtjfsgpknwh5rmdxzzhptxvvtuv5sfznm2dznhvqwkjratlwdmhkcleYnvndshjsvkzzevpbagc1zmq0nlhlsg43ynvhruvdl0zmuhvielnhrkrzsvfyls05ukjqm24ym0d4bjfbrwfvqjlyszjnpt0', 'wwjfau8wmtbmse9uvlr2ynrlkzlvdhe3mvgytjvls1jkvm1qmjvnk2jsbeYxcvznqk9or3a4vu1luzzwy1lcevltndvsskfmufnsrwt3cxduYytxqlhnyk5bbnvozkttmujlrwqyawxfexrsr1zcvzvnsgjru0tlvvr0mjryr2hxbxpaznrnywrzv0vnbmpjdja5t1rzzefkallmmevysva3zkv3cju5dvvazjbmaju5bdixvkewbuqvsuvywgdqatc5wejyt2tvntvswwx1tezhqxb1l3dkuxl5awpoqllev245vstiajfddxphwfqxvgvpegjdv3jseu9lbe1vqmxhrklla3bsrm9xukntakirwxldc3i5zjdzoggwymplmfpgrgrxkzg3qtjfsgpknwh5rmdxzzhptxvvtuv5sfznm2dznhvqwkjratlwdmhkcleYnvndshjsvkzzevpbagc1zmq0nlhlsg43ynvhruvdl0zmuhvielnhrkrzsvfyls05ukjqm24ym0d4bjfbrwfvqjlyszjnpt1', 'wwjfau8wmtbmse9uvlr2ynrlkzlvdhe3mvgytjvls1jkvm1qmjvnk2jsbeYxcvznqk9or3a4vu1luzzwy1lcevltndvsskfmufnsrwt3cxduYytxqlhnyk5bbnvozkttmujlrwqyawxfexrsr1zcvzvnsgjru0tlvvr0mjryr2hxbxpaznrnywrzv0vnbmpjdja5t1rzzefkallmmevysva3zkv3cju5dvvazjbmaju5bdixvkewbuqvsuvywgdqatc5wejyt2tvntvswwx1tezhqxb1l3dkuxl5awpoqllev245vstiajfddxphwfqxvgvpegjdv3jseu9lbe1vqmxhrklla3bsrm9xukntakirwxldc3i5zjdzoggwymplmfpgrgrxkzg3qtjfsgpknwh5rmdxzzhptxvvtuv5sfznm2dznhvqwkjratlwdmhkcleYnvndshjsvkzzevpbagc1zmq0nlhlsg43ynvhruvdl0zmuhvielnhrkrzsvfyls05ukjqm24ym0d4bjfbrwfvqjlyszjnpt1', ], new ExternalId('id_1')); - $storage->addToFulltext(['word21', + $storage->addToFulltextIndex(['word21', 'wwjfau8wmtbmse9uvlr2ynrlkzlvdhe3mvgytjvls1jkvm1qmjvnk2jsbeYxcvznqk9or3a4vu1luzzwy1lcevltndvsskfmufnsrwt3cxduYytxqlhnyk5bbnvozkttmujlrwqyawxfexrsr1zcvzvnsgjru0tlvvr0mjryr2hxbxpaznrnywrzv0vnbmpjdja5t1rzzefkallmmevysva3zkv3cju5dvvazjbmaju5bdixvkewbuqvsuvywgdqatc5wejyt2tvntvswwx1tezhqxb1l3dkuxl5awpoqllev245vstiajfddxphwfqxvgvpegjdv3jseu9lbe1vqmxhrklla3bsrm9xukntakirwxldc3i5zjdzoggwymplmfpgrgrxkzg3qtjfsgpknwh5rmdxzzhptxvvtuv5sfznm2dznhvqwkjratlwdmhkcleYnvndshjsvkzzevpbagc1zmq0nlhlsg43ynvhruvdl0zmuhvielnhrkrzsvfyls05ukjqm24ym0d4bjfbrwfvqjlyszjnpt0', 'wwjfau8wmtbmse9uvlr2ynrlkzlvdhe3mvgytjvls1jkvm1qmjvnk2jsbeYxcvznqk9or3a4vu1luzzwy1lcevltndvsskfmufnsrwt3cxduYytxqlhnyk5bbnvozkttmujlrwqyawxfexrsr1zcvzvnsgjru0tlvvr0mjryr2hxbxpaznrnywrzv0vnbmpjdja5t1rzzefkallmmevysva3zkv3cju5dvvazjbmaju5bdixvkewbuqvsuvywgdqatc5wejyt2tvntvswwx1tezhqxb1l3dkuxl5awpoqllev245vstiajfddxphwfqxvgvpegjdv3jseu9lbe1vqmxhrklla3bsrm9xukntakirwxldc3i5zjdzoggwymplmfpgrgrxkzg3qtjfsgpknwh5rmdxzzhptxvvtuv5sfznm2dznhvqwkjratlwdmhkcleYnvndshjsvkzzevpbagc1zmq0nlhlsg43ynvhruvdl0zmuhvielnhrkrzsvfyls05ukjqm24ym0d4bjfbrwfvqjlyszjnpt0', 'wwjfau8wmtbmse9uvlr2ynrlkzlvdhe3mvgytjvls1jkvm1qmjvnk2jsbeYxcvznqk9or3a4vu1luzzwy1lcevltndvsskfmufnsrwt3cxduYytxqlhnyk5bbnvozkttmujlrwqyawxfexrsr1zcvzvnsgjru0tlvvr0mjryr2hxbxpaznrnywrzv0vnbmpjdja5t1rzzefkallmmevysva3zkv3cju5dvvazjbmaju5bdixvkewbuqvsuvywgdqatc5wejyt2tvntvswwx1tezhqxb1l3dkuxl5awpoqllev245vstiajfddxphwfqxvgvpegjdv3jseu9lbe1vqmxhrklla3bsrm9xukntakirwxldc3i5zjdzoggwymplmfpgrgrxkzg3qtjfsgpknwh5rmdxzzhptxvvtuv5sfznm2dznhvqwkjratlwdmhkcleYnvndshjsvkzzevpbagc1zmq0nlhlsg43ynvhruvdl0zmuhvielnhrkrzsvfyls05ukjqm24ym0d4bjfbrwfvqjlyszjnpt1', ], new ExternalId('id_2')); - $storage->addToFulltext(['word3', '1' . str_repeat('ю', 200)], new ExternalId('id_1')); - $storage->addToFulltext(['word3', '1' . str_repeat('ю', 200)], new ExternalId('id_2')); + $storage->addToFulltextIndex(['word3', '1' . str_repeat('ю', 200)], new ExternalId('id_1')); + $storage->addToFulltextIndex(['word3', '1' . str_repeat('ю', 200)], new ExternalId('id_2')); - $storage->addToFulltext(['word4', '1' . str_repeat('я', 255)], new ExternalId('id_1')); - $storage->addToFulltext(['word4', '1' . str_repeat('я', 255)], new ExternalId('id_2')); + $storage->addToFulltextIndex(['word4', '1' . str_repeat('я', 255)], new ExternalId('id_1')); + $storage->addToFulltextIndex(['word4', '1' . str_repeat('я', 255)], new ExternalId('id_2')); } public function testParallelAddingInTransactions(): void @@ -302,7 +302,7 @@ public function testParallelAddingInTransactions(): void new TocEntry('title 1', 'descr 1', new \DateTime('2014-05-28'), '', 1, '123456789'), new ExternalId('id_1') ); - $storage->addToFulltext(['word1', 'word2', 'word3'], new ExternalId('id_1')); + $storage->addToFulltextIndex(['word1', 'word2', 'word3'], new ExternalId('id_1')); $storage2 = new PdoStorage($pdo2, 'test_tr_'); $storage2->startTransaction(); @@ -319,7 +319,7 @@ public function testParallelAddingInTransactions(): void if ($driverName !== 'sqlite') { $this->expectException(RuntimeException::class); $this->expectExceptionMessage('Cannot insert words. Possible deadlock?'); - $storage2->addToFulltext(['word1', 'word5'], new ExternalId('id_2')); + $storage2->addToFulltextIndex(['word1', 'word5'], new ExternalId('id_2')); // $storage2->commitTransaction(); // // $storage->addToFulltext(['word4', 'word5', 'word6'], new ExternalId('id_1')); @@ -369,7 +369,7 @@ public function testBrokenDb() new ExternalId('id_1') ); - $this->pdo->exec('DROP TABLE test_keyword_multiple_index;'); + $this->pdo->exec('DROP TABLE test_fulltext_index;'); $storage->removeFromIndex(new ExternalId('id_1')); } @@ -399,7 +399,7 @@ public function testNonExistentDbAddToFulltext() { $this->expectException(UnknownIdException::class); $storage = new PdoStorage($this->pdo, 'non_existent_'); - $storage->addToFulltext(['word'], new ExternalId('id_1')); + $storage->addToFulltextIndex(['word'], new ExternalId('id_1')); } public function testNonExistentDbAddToSingleKeywordIndex() diff --git a/tests/unit/Rose/Storage/SingleFileArrayStorageTest.php b/tests/unit/Rose/Storage/SingleFileArrayStorageTest.php index ebaccfd..0a75851 100644 --- a/tests/unit/Rose/Storage/SingleFileArrayStorageTest.php +++ b/tests/unit/Rose/Storage/SingleFileArrayStorageTest.php @@ -50,7 +50,7 @@ public function testStorage() $this->assertEquals(1, $entry1->getInternalId()); $this->assertEquals(2, $entry2->getInternalId()); - $storage->addToFulltext([1 => 'hello', 2 => 'world'], new ExternalId('test_id_1')); + $storage->addToFulltextIndex([1 => 'hello', 2 => 'world'], new ExternalId('test_id_1')); $fulltextResult = $storage->fulltextResultByWords(['hello']); $info = $fulltextResult->toArray()['hello']; @@ -74,7 +74,7 @@ public function testStorage() $entry3 = $storage->getTocByExternalId(new ExternalId('test_id_3')); $this->assertNull($entry3); - $storage->addToFulltext([10 => 'hello', 20 => 'world'], new ExternalId('test_id_2')); + $storage->addToFulltextIndex([10 => 'hello', 20 => 'world'], new ExternalId('test_id_2')); $fulltextResult = $storage->fulltextResultByWords(['world']); $info = $fulltextResult->toArray()['world'];