Skip to content

Commit

Permalink
Merge pull request #396 from voduylinhnashtech/main
Browse files Browse the repository at this point in the history
Update isMn func retain the tonal marks and diacritics such as the circumflex, ơ, and ư in Vietnamese.
  • Loading branch information
kercre123 authored Oct 10, 2024
2 parents 3633ed4 + e4e6bd5 commit 52e5f0f
Showing 1 changed file with 21 additions and 1 deletion.
22 changes: 21 additions & 1 deletion chipper/pkg/wirepod/ttr/kgsim.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,27 @@ func Remember(user, ai openai.ChatCompletionMessage, esn string) {
}

func isMn(r rune) bool {
return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks
// Remove the characters that are not related to Vietnamese.
// Retain the tonal marks and diacritics such as the circumflex, ơ, and ư in Vietnamese.
keepMarks := []rune{
'\u0300', // Dấu huyền
'\u0301', // Dấu sắc
'\u0303', // Dấu ngã
'\u0309', // Dấu hỏi
'\u0323', // Dấu nặng
'\u0302', // Dấu mũ (â, ê, ô)
'\u031B', // Dấu ơ và ư
'\u0306', // Dấu trầm
}
if unicode.Is(unicode.Mn, r) {
for _, mark := range keepMarks {
if r == mark {
return false
}
}
return true
}
return false
}

func removeSpecialCharacters(str string) string {
Expand Down

0 comments on commit 52e5f0f

Please sign in to comment.