From 040f822fb506889d41adcfa0662b35753ae82689 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=BDubom=C3=ADr=20Samotn=C3=BD?= Date: Tue, 16 Dec 2025 16:31:02 +0100 Subject: [PATCH 1/2] triv(service): add custom transliterate fallback if php intl not enabled in env --- lib/RaiAcceptService.php | 65 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/lib/RaiAcceptService.php b/lib/RaiAcceptService.php index 44d7a98..15ed32a 100644 --- a/lib/RaiAcceptService.php +++ b/lib/RaiAcceptService.php @@ -13,10 +13,71 @@ class RaiAcceptService public static function transliterate(string $string) { - $string = transliterator_transliterate('Any-Latin; Latin-ASCII', $string); - return $string; + if (function_exists('transliterator_transliterate')) { + $out = transliterator_transliterate('Any-Latin', $string); + if (is_string($out)) { + return $out; + } + } + + $string = self::transliterateNonLatinFallback($string); + $string = preg_replace('/[^\p{Latin}\p{N}\s\.\,\-\'\/@]/u', '', $string) ?? $string; + + // 4) Normalize whitespace + $string = preg_replace('/\s+/u', ' ', $string) ?? $string; + + return trim($string); } + private static function transliterateNonLatinFallback(string $s): string + { + static $map = [ + // Greek + 'Α'=>'A','α'=>'a','Β'=>'B','β'=>'b','Γ'=>'G','γ'=>'g','Δ'=>'D','δ'=>'d', + 'Ε'=>'E','ε'=>'e','Ζ'=>'Z','ζ'=>'z','Η'=>'E','η'=>'e','Θ'=>'Th','θ'=>'th', + 'Ι'=>'I','ι'=>'i','Κ'=>'K','κ'=>'k','Λ'=>'L','λ'=>'l','Μ'=>'M','μ'=>'m', + 'Ν'=>'N','ν'=>'n','Ξ'=>'X','ξ'=>'x','Ο'=>'O','ο'=>'o','Π'=>'P','π'=>'p', + 'Ρ'=>'R','ρ'=>'r','Σ'=>'S','σ'=>'s','ς'=>'s','Τ'=>'T','τ'=>'t','Υ'=>'Y','υ'=>'y', + 'Φ'=>'Ph','φ'=>'ph','Χ'=>'Ch','χ'=>'ch','Ψ'=>'Ps','ψ'=>'ps','Ω'=>'O','ω'=>'o', + + // Arabic (basic mapping for common names) + 'ا'=>'a','ب'=>'b','ت'=>'t','ث'=>'th','ج'=>'j','ح'=>'h','خ'=>'kh','د'=>'d', + 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ص'=>'s','ض'=>'d','ط'=>'t', + 'ظ'=>'z','ع'=>'a','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k','ل'=>'l','م'=>'m', + 'ن'=>'n','ه'=>'h','و'=>'w','ي'=>'y','ء'=>'','آ'=>'a','أ'=>'a','إ'=>'i', + 'ى'=>'a','ة'=>'h', + + // Hebrew + 'א'=>'','ב'=>'b','ג'=>'g','ד'=>'d','ה'=>'h','ו'=>'v','ז'=>'z','ח'=>'ch', + 'ט'=>'t','י'=>'y','כ'=>'k','ך'=>'k','ל'=>'l','מ'=>'m','ם'=>'m','נ'=>'n', + 'ן'=>'n','ס'=>'s','ע'=>'','פ'=>'p','ף'=>'p','צ'=>'ts','ץ'=>'ts','ק'=>'k', + 'ר'=>'r','ש'=>'sh','ת'=>'t', + + // Cyrillic + 'А'=>'A','а'=>'a','Б'=>'B','б'=>'b','В'=>'V','в'=>'v','Г'=>'G','г'=>'g', + 'Д'=>'D','д'=>'d','Е'=>'E','е'=>'e','Ё'=>'E','ё'=>'e','Ж'=>'Z','ж'=>'z', + 'З'=>'Z','з'=>'z','И'=>'I','и'=>'i','Й'=>'J','й'=>'j','К'=>'K','к'=>'k', + 'Л'=>'L','л'=>'l','М'=>'M','м'=>'m','Н'=>'N','н'=>'n','О'=>'O','о'=>'o', + 'П'=>'P','п'=>'p','Р'=>'R','р'=>'r','С'=>'S','с'=>'s','Т'=>'T','т'=>'t', + 'У'=>'U','у'=>'u','Ф'=>'F','ф'=>'f','Х'=>'H','х'=>'h','Ц'=>'C','ц'=>'c', + 'Ч'=>'Ch','ч'=>'ch','Ш'=>'Sh','ш'=>'sh','Щ'=>'Sch','щ'=>'sch','Ъ'=>'','ъ'=>'', + 'Ы'=>'Y','ы'=>'y','Ь'=>'','ь'=>'','Э'=>'E','э'=>'e','Ю'=>'Yu','ю'=>'yu', + 'Я'=>'Ya','я'=>'ya', + + // Ukrainian + 'Є'=>'Ye','є'=>'ye','І'=>'I','і'=>'i','Ї'=>'Yi','ї'=>'yi','Ґ'=>'G','ґ'=>'g', + + // Belarusian + 'Ў'=>'U','ў'=>'u', + + // Serbian / Macedonian extras + 'Ђ'=>'Dj','ђ'=>'dj','Љ'=>'Lj','љ'=>'lj','Њ'=>'Nj','њ'=>'nj','Ћ'=>'C','ћ'=>'c', + 'Џ'=>'Dz','џ'=>'dz', + ]; + + return strtr($s, $map); + } + public static function transliterate_and_limit_length(string $string, int $limit = 127) { $string = self::transliterate($string); From 8938648e24076e8bf88e06cf2380aeefed087c8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=BDubom=C3=ADr=20Samotn=C3=BD?= Date: Tue, 16 Dec 2025 17:22:57 +0100 Subject: [PATCH 2/2] triv: improve performance of new transliterate map --- lib/RaiAcceptService.php | 99 ++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 45 deletions(-) diff --git a/lib/RaiAcceptService.php b/lib/RaiAcceptService.php index 15ed32a..e82e402 100644 --- a/lib/RaiAcceptService.php +++ b/lib/RaiAcceptService.php @@ -11,6 +11,54 @@ class RaiAcceptService public const STATUS_CANCELED = "CANCELED"; public const STATUS_ABANDONED = "ABANDONED"; + /** + * Transliteration map for non-Latin characters to Latin equivalents + * Initialized once when class is loaded for optimal performance + */ + private static array $transliterationMap = [ + // Greek + 'Α'=>'A','α'=>'a','Β'=>'B','β'=>'b','Γ'=>'G','γ'=>'g','Δ'=>'D','δ'=>'d', + 'Ε'=>'E','ε'=>'e','Ζ'=>'Z','ζ'=>'z','Η'=>'E','η'=>'e','Θ'=>'Th','θ'=>'th', + 'Ι'=>'I','ι'=>'i','Κ'=>'K','κ'=>'k','Λ'=>'L','λ'=>'l','Μ'=>'M','μ'=>'m', + 'Ν'=>'N','ν'=>'n','Ξ'=>'X','ξ'=>'x','Ο'=>'O','ο'=>'o','Π'=>'P','π'=>'p', + 'Ρ'=>'R','ρ'=>'r','Σ'=>'S','σ'=>'s','ς'=>'s','Τ'=>'T','τ'=>'t','Υ'=>'Y','υ'=>'y', + 'Φ'=>'Ph','φ'=>'ph','Χ'=>'Ch','χ'=>'ch','Ψ'=>'Ps','ψ'=>'ps','Ω'=>'O','ω'=>'o', + + // Arabic (basic mapping for common names) + 'ا'=>'a','ب'=>'b','ت'=>'t','ث'=>'th','ج'=>'j','ح'=>'h','خ'=>'kh','د'=>'d', + 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ص'=>'s','ض'=>'d','ط'=>'t', + 'ظ'=>'z','ع'=>'a','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k','ل'=>'l','م'=>'m', + 'ن'=>'n','ه'=>'h','و'=>'w','ي'=>'y','ء'=>'','آ'=>'a','أ'=>'a','إ'=>'i', + 'ى'=>'a','ة'=>'h', + + // Hebrew + 'א'=>'','ב'=>'b','ג'=>'g','ד'=>'d','ה'=>'h','ו'=>'v','ז'=>'z','ח'=>'ch', + 'ט'=>'t','י'=>'y','כ'=>'k','ך'=>'k','ל'=>'l','מ'=>'m','ם'=>'m','נ'=>'n', + 'ן'=>'n','ס'=>'s','ע'=>'','פ'=>'p','ף'=>'p','צ'=>'ts','ץ'=>'ts','ק'=>'k', + 'ר'=>'r','ש'=>'sh','ת'=>'t', + + // Cyrillic + 'А'=>'A','а'=>'a','Б'=>'B','б'=>'b','В'=>'V','в'=>'v','Г'=>'G','г'=>'g', + 'Д'=>'D','д'=>'d','Е'=>'E','е'=>'e','Ё'=>'E','ё'=>'e','Ж'=>'Z','ж'=>'z', + 'З'=>'Z','з'=>'z','И'=>'I','и'=>'i','Й'=>'J','й'=>'j','К'=>'K','к'=>'k', + 'Л'=>'L','л'=>'l','М'=>'M','м'=>'m','Н'=>'N','н'=>'n','О'=>'O','о'=>'o', + 'П'=>'P','п'=>'p','Р'=>'R','р'=>'r','С'=>'S','с'=>'s','Т'=>'T','т'=>'t', + 'У'=>'U','у'=>'u','Ф'=>'F','ф'=>'f','Х'=>'H','х'=>'h','Ц'=>'C','ц'=>'c', + 'Ч'=>'Ch','ч'=>'ch','Ш'=>'Sh','ш'=>'sh','Щ'=>'Sch','щ'=>'sch','Ъ'=>'','ъ'=>'', + 'Ы'=>'Y','ы'=>'y','Ь'=>'','ь'=>'','Э'=>'E','э'=>'e','Ю'=>'Yu','ю'=>'yu', + 'Я'=>'Ya','я'=>'ya', + + // Ukrainian + 'Є'=>'Ye','є'=>'ye','І'=>'I','і'=>'i','Ї'=>'Yi','ї'=>'yi','Ґ'=>'G','ґ'=>'g', + + // Belarusian + 'Ў'=>'U','ў'=>'u', + + // Serbian / Macedonian extras + 'Ђ'=>'Dj','ђ'=>'dj','Љ'=>'Lj','љ'=>'lj','Њ'=>'Nj','њ'=>'nj','Ћ'=>'C','ћ'=>'c', + 'Џ'=>'Dz','џ'=>'dz', + ]; + public static function transliterate(string $string) { if (function_exists('transliterator_transliterate')) { @@ -31,51 +79,12 @@ public static function transliterate(string $string) private static function transliterateNonLatinFallback(string $s): string { - static $map = [ - // Greek - 'Α'=>'A','α'=>'a','Β'=>'B','β'=>'b','Γ'=>'G','γ'=>'g','Δ'=>'D','δ'=>'d', - 'Ε'=>'E','ε'=>'e','Ζ'=>'Z','ζ'=>'z','Η'=>'E','η'=>'e','Θ'=>'Th','θ'=>'th', - 'Ι'=>'I','ι'=>'i','Κ'=>'K','κ'=>'k','Λ'=>'L','λ'=>'l','Μ'=>'M','μ'=>'m', - 'Ν'=>'N','ν'=>'n','Ξ'=>'X','ξ'=>'x','Ο'=>'O','ο'=>'o','Π'=>'P','π'=>'p', - 'Ρ'=>'R','ρ'=>'r','Σ'=>'S','σ'=>'s','ς'=>'s','Τ'=>'T','τ'=>'t','Υ'=>'Y','υ'=>'y', - 'Φ'=>'Ph','φ'=>'ph','Χ'=>'Ch','χ'=>'ch','Ψ'=>'Ps','ψ'=>'ps','Ω'=>'O','ω'=>'o', - - // Arabic (basic mapping for common names) - 'ا'=>'a','ب'=>'b','ت'=>'t','ث'=>'th','ج'=>'j','ح'=>'h','خ'=>'kh','د'=>'d', - 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ص'=>'s','ض'=>'d','ط'=>'t', - 'ظ'=>'z','ع'=>'a','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k','ل'=>'l','م'=>'m', - 'ن'=>'n','ه'=>'h','و'=>'w','ي'=>'y','ء'=>'','آ'=>'a','أ'=>'a','إ'=>'i', - 'ى'=>'a','ة'=>'h', - - // Hebrew - 'א'=>'','ב'=>'b','ג'=>'g','ד'=>'d','ה'=>'h','ו'=>'v','ז'=>'z','ח'=>'ch', - 'ט'=>'t','י'=>'y','כ'=>'k','ך'=>'k','ל'=>'l','מ'=>'m','ם'=>'m','נ'=>'n', - 'ן'=>'n','ס'=>'s','ע'=>'','פ'=>'p','ף'=>'p','צ'=>'ts','ץ'=>'ts','ק'=>'k', - 'ר'=>'r','ש'=>'sh','ת'=>'t', - - // Cyrillic - 'А'=>'A','а'=>'a','Б'=>'B','б'=>'b','В'=>'V','в'=>'v','Г'=>'G','г'=>'g', - 'Д'=>'D','д'=>'d','Е'=>'E','е'=>'e','Ё'=>'E','ё'=>'e','Ж'=>'Z','ж'=>'z', - 'З'=>'Z','з'=>'z','И'=>'I','и'=>'i','Й'=>'J','й'=>'j','К'=>'K','к'=>'k', - 'Л'=>'L','л'=>'l','М'=>'M','м'=>'m','Н'=>'N','н'=>'n','О'=>'O','о'=>'o', - 'П'=>'P','п'=>'p','Р'=>'R','р'=>'r','С'=>'S','с'=>'s','Т'=>'T','т'=>'t', - 'У'=>'U','у'=>'u','Ф'=>'F','ф'=>'f','Х'=>'H','х'=>'h','Ц'=>'C','ц'=>'c', - 'Ч'=>'Ch','ч'=>'ch','Ш'=>'Sh','ш'=>'sh','Щ'=>'Sch','щ'=>'sch','Ъ'=>'','ъ'=>'', - 'Ы'=>'Y','ы'=>'y','Ь'=>'','ь'=>'','Э'=>'E','э'=>'e','Ю'=>'Yu','ю'=>'yu', - 'Я'=>'Ya','я'=>'ya', - - // Ukrainian - 'Є'=>'Ye','є'=>'ye','І'=>'I','і'=>'i','Ї'=>'Yi','ї'=>'yi','Ґ'=>'G','ґ'=>'g', - - // Belarusian - 'Ў'=>'U','ў'=>'u', - - // Serbian / Macedonian extras - 'Ђ'=>'Dj','ђ'=>'dj','Љ'=>'Lj','љ'=>'lj','Њ'=>'Nj','њ'=>'nj','Ћ'=>'C','ћ'=>'c', - 'Џ'=>'Dz','џ'=>'dz', - ]; - - return strtr($s, $map); + // Early return for strings that only contain Latin characters and common punctuation + if (!preg_match('/[^\p{Latin}\p{N}\s\.\,\-\'\/@]/u', $s)) { + return $s; + } + + return strtr($s, self::$transliterationMap); } public static function transliterate_and_limit_length(string $string, int $limit = 127)