From 92c80a89abbbd470adfda239352d74129439ccde Mon Sep 17 00:00:00 2001 From: rampaa Date: Fri, 23 Aug 2024 21:06:15 +0300 Subject: [PATCH] Minor --- JL.Core/Utilities/ExtensionMethods.cs | 27 +++++++++++++++++---------- JL.Core/Utilities/JapaneseUtils.cs | 12 ++++++------ JL.Core/Utilities/TextUtils.cs | 2 +- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/JL.Core/Utilities/ExtensionMethods.cs b/JL.Core/Utilities/ExtensionMethods.cs index 387c238f..63d88584 100644 --- a/JL.Core/Utilities/ExtensionMethods.cs +++ b/JL.Core/Utilities/ExtensionMethods.cs @@ -39,22 +39,29 @@ public static T GetEnum(this string description) where T : struct, Enum // return default; } - internal static List ListUnicodeCharacters(this string s) + internal static List ListUnicodeCharacters(this ReadOnlySpan text) { - List textBlocks = new(s.Length); - for (int i = 0; i < s.Length; i++) + List textBlocks = new(text.Length); + for (int i = 0; i < text.Length; i++) { - if (char.IsHighSurrogate(s, i) - && s.Length > i + 1 - && char.IsLowSurrogate(s, i + 1)) + char highSurrogateCandidate = text[i]; + if (char.IsHighSurrogate(highSurrogateCandidate) + && text.Length > i + 1) { - textBlocks.Add(char.ConvertFromUtf32(char.ConvertToUtf32(s, i))); - - ++i; + char lowSurragateCandidate = text[i + 1]; + if (char.IsLowSurrogate(lowSurragateCandidate)) + { + textBlocks.Add(char.ConvertFromUtf32(char.ConvertToUtf32(highSurrogateCandidate, lowSurragateCandidate))); + ++i; + } + else + { + textBlocks.Add(highSurrogateCandidate.ToString()); + } } else { - textBlocks.Add(s[i].ToString()); + textBlocks.Add(highSurrogateCandidate.ToString()); } } diff --git a/JL.Core/Utilities/JapaneseUtils.cs b/JL.Core/Utilities/JapaneseUtils.cs index 28b2e8ed..7ea734ee 100644 --- a/JL.Core/Utilities/JapaneseUtils.cs +++ b/JL.Core/Utilities/JapaneseUtils.cs @@ -174,7 +174,7 @@ public static partial class JapaneseUtils private static readonly FrozenSet s_expressionTerminatingCharacters = s_leftToRightBracketDict.Keys.Union(s_leftToRightBracketDict.Values).Union(s_sentenceTerminatingCharacters).ToFrozenSet(); - private static int FirstKatakanaIndex(string text) + private static int FirstKatakanaIndex(ReadOnlySpan text) { int textLength = text.Length; for (int i = 0; i < textLength; i++) @@ -219,7 +219,7 @@ public static string KatakanaToHiragana(string text) return textInHiragana.ToString(); } - internal static List LongVowelMarkToKana(string text) + internal static List LongVowelMarkToKana(ReadOnlySpan text) { List unicodeTextList = text.ListUnicodeCharacters(); @@ -278,7 +278,7 @@ internal static List LongVowelMarkToKana(string text) return stringBuilders.ConvertAll(static sb => sb.ToString()); } - public static List CreateCombinedForm(string text) + public static List CreateCombinedForm(ReadOnlySpan text) { List combinedForm = new(text.Length); @@ -300,12 +300,12 @@ public static List CreateCombinedForm(string text) return combinedForm; } - internal static bool IsKatakana(string text) + internal static bool IsKatakana(ReadOnlySpan text) { return s_katakanaToHiraganaDict.ContainsKey(text[0]); } - public static int FindExpressionBoundary(string text, int position) + public static int FindExpressionBoundary(ReadOnlySpan text, int position) { int endPosition = text.Length; for (int i = position; i < text.Length; i++) @@ -412,7 +412,7 @@ internal static string FindSentence(string text, int position) return sentence; } - private static int FirstPunctuationIndex(string text) + private static int FirstPunctuationIndex(ReadOnlySpan text) { int charIndex = 0; foreach (Rune rune in text.EnumerateRunes()) diff --git a/JL.Core/Utilities/TextUtils.cs b/JL.Core/Utilities/TextUtils.cs index 409526d7..0ee0c70a 100644 --- a/JL.Core/Utilities/TextUtils.cs +++ b/JL.Core/Utilities/TextUtils.cs @@ -6,7 +6,7 @@ namespace JL.Core.Utilities; public static class TextUtils { - private static int FirstInvalidUnicodeSequenceIndex(string text) + private static int FirstInvalidUnicodeSequenceIndex(ReadOnlySpan text) { for (int i = 0; i < text.Length; i++) {