From e4e6bd5335a175276e07568e0b46e26988b9501d Mon Sep 17 00:00:00 2001 From: voduylinhnashtech <142197011+voduylinhnashtech@users.noreply.github.com> Date: Thu, 3 Oct 2024 13:30:43 +0700 Subject: [PATCH] Update kgsim.go --- chipper/pkg/wirepod/ttr/kgsim.go | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/chipper/pkg/wirepod/ttr/kgsim.go b/chipper/pkg/wirepod/ttr/kgsim.go index 9b0b9854..8760a2e9 100644 --- a/chipper/pkg/wirepod/ttr/kgsim.go +++ b/chipper/pkg/wirepod/ttr/kgsim.go @@ -66,7 +66,27 @@ func Remember(user, ai openai.ChatCompletionMessage, esn string) { } func isMn(r rune) bool { - return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks + // Remove the characters that are not related to Vietnamese. + // Retain the tonal marks and diacritics such as the circumflex, ơ, and ư in Vietnamese. + keepMarks := []rune{ + '\u0300', // Dấu huyền + '\u0301', // Dấu sắc + '\u0303', // Dấu ngã + '\u0309', // Dấu hỏi + '\u0323', // Dấu nặng + '\u0302', // Dấu mũ (â, ê, ô) + '\u031B', // Dấu ơ và ư + '\u0306', // Dấu trầm + } + if unicode.Is(unicode.Mn, r) { + for _, mark := range keepMarks { + if r == mark { + return false + } + } + return true + } + return false } func removeSpecialCharacters(str string) string {