diff --git a/classes/GravTNTSearch.php b/classes/GravTNTSearch.php index 864856d..73203ff 100644 --- a/classes/GravTNTSearch.php +++ b/classes/GravTNTSearch.php @@ -214,7 +214,13 @@ public static function getCleanContent($page) $content = strip_tags($content); $content = preg_replace(['/[ \t]+/', '/\s*$^\s*/m'], [' ', "\n"], $content) ?? $content; - + // 2023-11-21 MCH - Remove some in-word Unicode that regularly breaks searches + $problematic = [ + '/­/i', '/­/', '/­/i', '/\x{00AD}/u', // soft hyphen + '/‍/i', '/‍/', '/‍/i', '/\x{200D}/u', // zero-width joiner + '/‌/i', '/‌/', '/‌/i', '/\x{200C}/u', // zero-width non-joiner + ]; + $content = preg_replace($problematic, '', $content) ?? $content; // Restore active page in Grav. unset($grav['page']); $grav['page'] = $activePage;