diff --git a/ChangeLog b/ChangeLog index 147b569e..9c521de8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,8 @@ ChangeLog Upcoming version +[!!!] The generation of tags has been updated. If a tag is auto-generated e.g. from a category title, it will now be changed if it does not meet the requirements (length, allowed characters, not in list of stop words). If it is too short or is in the list of stop words, the character "A" will be appended automatically. That may affect your filters if you are using tags that don't meet the requirements. In that case re-index and check if the tags in filter options match the tags in the index. +[FEATURE] Make sure stop words are not used as tags, https://github.com/tpwd/ke_search/issues/81 [TASK] Update default pagination template, add first and last page, thanks to Stephan Bauer, https://github.com/tpwd/ke_search/issues/219 and https://github.com/tpwd/ke_search/pull/218 [TASK] Add "ignorePageTypeRestriction" to TCA configs, allow records on default pages and the root page in TYPO3 v12, thanks to Dimitri König, https://github.com/tpwd/ke_search/pull/212 diff --git a/Classes/Lib/SearchHelper.php b/Classes/Lib/SearchHelper.php index 2f449b66..6c90fa70 100644 --- a/Classes/Lib/SearchHelper.php +++ b/Classes/Lib/SearchHelper.php @@ -22,6 +22,7 @@ use Psr\Http\Message\ServerRequestInterface; use Tpwd\KeSearch\Domain\Repository\CategoryRepository; +use Tpwd\KeSearch\Utility\StopWordUtility; use TYPO3\CMS\Core\Configuration\Exception\ExtensionConfigurationExtensionNotConfiguredException; use TYPO3\CMS\Core\Configuration\Exception\ExtensionConfigurationPathDoesNotExistException; use TYPO3\CMS\Core\Configuration\ExtensionConfiguration; @@ -175,27 +176,55 @@ public static function addTag(string $tagToAdd, $tags = '') /** * creates tags from an array of strings - * removes non-alphanumeric characters * * @param string|null $tags comma-list of tags, new tags will be added to this - * @param array $tagTitles Array of Titles (eg. categories) + * @param array $tagTitles Array of Titles (e.g. categories) */ public static function makeTags(&$tags, array $tagTitles) { if (is_array($tagTitles) && count($tagTitles)) { $tags = $tags ?? ''; - $extConf = SearchHelper::getExtConf(); - foreach ($tagTitles as $title) { - $tag = preg_replace('/[^A-Za-z0-9]/', '', $title); if (!empty($tags)) { $tags .= ','; } - $tags .= $extConf['prePostTagChar'] . $tag . $extConf['prePostTagChar']; + $tags .= self::makeTag($title); } } } + /** + * Creates a tag like #colorblue# from a string. + * Removes not allowed characters, extends it if the tag is too short and changes it if + * it is in the list of stop words. + * + * @param string $tag + * @param bool $addPrePostTagChar If true, the tag will be surrounded with "#" (or the configured 'prePostTagChar' character) + * @return string + */ + public static function makeTag(string $tag, bool $addPrePostTagChar = true): string + { + $extConf = SearchHelper::getExtConf(); + + // Remove not allowed characters + $tag = preg_replace('/[^A-Za-z0-9]/', '', $tag); + + // Fill up the tag if it is too short + $minLength = isset($extConf['searchWordLength']) ? (int)$extConf['searchWordLength'] : 4; + $tag = str_pad($tag, $minLength, 'A'); + + // Check if tag is in the list of stop words and append a character if so + if (in_array($tag, StopWordUtility::getStopWords())) { + $tag .= 'A'; + } + + if ($addPrePostTagChar) { + $tag = $extConf['prePostTagChar'] . $tag . $extConf['prePostTagChar']; + } + + return $tag; + } + /** * finds the system categories for $uid in $tablename, creates * tags like "syscat123" ("syscat" + category uid). diff --git a/Classes/UserFunction/CustomFieldValidation/FilterOptionTagValidator.php b/Classes/UserFunction/CustomFieldValidation/FilterOptionTagValidator.php index 17b70033..a1f6b31c 100644 --- a/Classes/UserFunction/CustomFieldValidation/FilterOptionTagValidator.php +++ b/Classes/UserFunction/CustomFieldValidation/FilterOptionTagValidator.php @@ -25,7 +25,7 @@ namespace Tpwd\KeSearch\UserFunction\CustomFieldValidation; -use TYPO3\CMS\Core\Configuration\ExtensionConfiguration; +use Tpwd\KeSearch\Lib\SearchHelper; use TYPO3\CMS\Core\Information\Typo3Version; use TYPO3\CMS\Core\Messaging\AbstractMessage; use TYPO3\CMS\Core\Messaging\FlashMessage; @@ -35,34 +35,33 @@ use TYPO3\CMS\Extbase\Utility\LocalizationUtility; /** - * Validates given filter option tag. Checks length, which may not smaller than - * basic.searchWordLength extension (and MySQL) setting. + * Validates given filter option tag by applying SearchHelper::makeTag(). + * Checks length, which may not smaller than basic.searchWordLength extension (and MySQL) setting. + * Checks if it contains not allowed characters. + * Checks if it is in the list of stop words. */ class FilterOptionTagValidator { /** - * PHP Validation to disallow leading numbers - * * @param string $value - * @return mixed|string Updated string, which fits the requirements + * @return string Updated string, which fits the requirements */ - public function evaluateFieldValue($value) + public function evaluateFieldValue(string $value): string { - $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get('ke_search'); - $minLength = isset($extConf['searchWordLength']) ? (int)$extConf['searchWordLength'] : 4; + $tag = SearchHelper::makeTag($value, false); - if (strlen($value) < $minLength) { + if ($value != $tag) { if (GeneralUtility::makeInstance(Typo3Version::class)->getMajorVersion() < 12) { // @extensionScannerIgnoreLine - $severity = AbstractMessage::ERROR; + $severity = AbstractMessage::INFO; } else { - $severity = ContextualFeedbackSeverity::ERROR; + $severity = ContextualFeedbackSeverity::INFO; } /** @var FlashMessage $message */ $message = GeneralUtility::makeInstance( FlashMessage::class, - $this->translate('tag_too_short_message', [$value, $minLength]), - $this->translate('tag_too_short'), + $this->translate('tag_has_been_updated_message', [$value, $tag]), + $this->translate('tag_has_been_updated'), $severity, true ); @@ -71,9 +70,9 @@ public function evaluateFieldValue($value) $flashMessageService = GeneralUtility::makeInstance(FlashMessageService::class); // @extensionScannerIgnoreLine $flashMessageService->getMessageQueueByIdentifier()->addMessage($message); - return false; } - return $value; + + return $tag; } /** diff --git a/Classes/Utility/StopWordUtility.php b/Classes/Utility/StopWordUtility.php new file mode 100644 index 00000000..34a22780 --- /dev/null +++ b/Classes/Utility/StopWordUtility.php @@ -0,0 +1,576 @@ +getConnectionForTable('pages'); + $statement = $connection->prepare("SHOW VARIABLES LIKE 'ft_stopword_file';"); + $resultRows = $statement->executeQuery()->fetchAllAssociative(); + + // Check if the built-in stopword list is used + if (isset($resultRows[0]['Variable_name']) && $resultRows[0]['Variable_name'] == 'ft_stopword_file' + && isset($resultRows[0]['Value']) && $resultRows[0]['Value'] == '(built-in)' + ) { + return self::MYISAM_STOP_WORDS; + } + return []; + } + + public function calculateTimeBetweenDate() + { + } + + // Calculate time between dates +} diff --git a/Configuration/TCA/tx_kesearch_filteroptions.php b/Configuration/TCA/tx_kesearch_filteroptions.php index fd9cc861..392e47bc 100644 --- a/Configuration/TCA/tx_kesearch_filteroptions.php +++ b/Configuration/TCA/tx_kesearch_filteroptions.php @@ -89,7 +89,7 @@ 'config' => [ 'type' => 'input', 'size' => '30', - 'eval' => 'trim,alphanum,Tpwd\KeSearch\UserFunction\CustomFieldValidation\FilterOptionTagValidator', + 'eval' => 'trim,alphanum,' . \Tpwd\KeSearch\UserFunction\CustomFieldValidation\FilterOptionTagValidator::class, 'required' => true, ], ], diff --git a/Resources/Private/Language/locallang_mod.xlf b/Resources/Private/Language/locallang_mod.xlf index 01368225..e1506f91 100644 --- a/Resources/Private/Language/locallang_mod.xlf +++ b/Resources/Private/Language/locallang_mod.xlf @@ -60,12 +60,12 @@ Indexer configurations have been found: - - Tag too short + + Tag has been updated - - The given tag "%s" is too short. A minimum length of %d is required. See extension setting "searchWordLength" for further info. + + The given tag "%s" has been updated to "%s" because it did not match the criteria for a tag (length, allowed charachters, not in list of stop words). - \ No newline at end of file + diff --git a/ext_localconf.php b/ext_localconf.php index b82a7f91..a980c5bd 100644 --- a/ext_localconf.php +++ b/ext_localconf.php @@ -1,5 +1,7 @@