Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bamboo.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ type IEngine interface {
RemoveLastChar(bool)
RestoreLastWord(bool)
Reset()
RebuildEngineFromText(string)
}

type BambooEngine struct {
Expand Down
151 changes: 151 additions & 0 deletions rebuild.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
/*
* Bamboo - A Vietnamese Input method editor
* Copyright (C) Nguyễn Hoàng Kỳ <nhktmdzhg@gmail.com>
*
* This software is licensed under the MIT license. For more information,
* see <https://github.com/BambooEngine/bamboo-core/blob/master/LICENSE>.
*/

package bamboo

import (
"unicode"
)

// RebuildCompositionFromText creates a composition (list of Transformations) directly
// from a Vietnamese Unicode string, bypassing all Input Method rules.
func RebuildCompositionFromText(text string, stdStyle bool) []*Transformation {
var composition []*Transformation

// Split text into words by word break symbols (punctuation, spaces, etc.)
words := splitTextIntoWords(text)

for _, word := range words {
wordComposition := processWordToComposition(word, stdStyle)
composition = append(composition, wordComposition...)
}

return composition
}

// splitTextIntoWords splits text into words, preserving delimiters
func splitTextIntoWords(text string) []string {
var words []string
var currentWord []rune

for _, ch := range text {
if IsWordBreakSymbol(ch) {
if len(currentWord) > 0 {
words = append(words, string(currentWord))
currentWord = nil
}
words = append(words, string(ch))
} else {
currentWord = append(currentWord, ch)
}
}

if len(currentWord) > 0 {
words = append(words, string(currentWord))
}

return words
}

// processWordToComposition converts a word into transformations
func processWordToComposition(word string, stdStyle bool) []*Transformation {
var composition []*Transformation

if len(word) == 1 && IsWordBreakSymbol([]rune(word)[0]) {
ch := []rune(word)[0]
isUpperCase := unicode.IsUpper(ch)
appendTrans := &Transformation{
IsUpperCase: isUpperCase,
Rule: Rule{
Key: ch,
EffectOn: ch,
EffectType: Appending,
Result: ch,
},
}
return []*Transformation{appendTrans}
}

for _, ch := range word {
lowerCh := unicode.ToLower(ch)
isUpperCase := unicode.IsUpper(ch)

// Decompose the character into root + mark + tone
tone := FindToneFromChar(lowerCh)
mark, hasMark := FindMarkFromChar(lowerCh)

// Get the root character (no tone, no mark)
rootChar := lowerCh
if tone != ToneNone {
rootChar = AddToneToChar(rootChar, 0)
}
if hasMark && mark != MarkNone {
rootChar = AddMarkToChar(rootChar, 0)
}

// Create the base Appending transformation for the root character
appendTrans := &Transformation{
IsUpperCase: isUpperCase,
Rule: Rule{
Key: rootChar,
EffectOn: rootChar,
EffectType: Appending,
Result: rootChar,
},
}
composition = append(composition, appendTrans)

if hasMark && mark != MarkNone {
markTrans := &Transformation{
Target: appendTrans,
Rule: Rule{
Key: 0,
EffectType: MarkTransformation,
Effect: uint8(mark),
EffectOn: rootChar,
Result: AddMarkToTonelessChar(rootChar, uint8(mark)),
},
}
composition = append(composition, markTrans)
}
}

// Apply tones
var lastTone Tone = ToneNone
for _, ch := range word {
lowerCh := unicode.ToLower(ch)
t := FindToneFromChar(lowerCh)
if t != ToneNone {
lastTone = t
}
}

if lastTone != ToneNone {
toneTarget := findToneTarget(composition, stdStyle)
if toneTarget != nil {
toneTrans := &Transformation{
Target: toneTarget,
Rule: Rule{
Key: 0,
EffectType: ToneTransformation,
Effect: uint8(lastTone),
},
}
composition = append(composition, toneTrans)
}
}

return composition
}

// RebuildEngineFromText resets the engine and rebuilds its internal composition
// state from the given Vietnamese Unicode text, bypassing all IM rules.
func (e *BambooEngine) RebuildEngineFromText(text string) {
e.Reset()
e.composition = RebuildCompositionFromText(text, e.flags&EstdToneStyle != 0)
}
169 changes: 169 additions & 0 deletions rebuild_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
/*
* Bamboo - A Vietnamese Input method editor
* Copyright (C) Nguyễn Hoàng Kỳ <nhktmdzhg@gmail.com>
*
* This software is licensed under the MIT license. For more information,
* see <https://github.com/BambooEngine/bamboo-core/blob/master/LICENSE>.
*/

package bamboo

import (
"testing"
)

func TestRebuildFromText_SimpleASCII(t *testing.T) {
composition := RebuildCompositionFromText("goo", true)
result := Flatten(composition, VietnameseMode)
if result != "goo" {
t.Errorf("RebuildFromText('goo') = %q, want %q", result, "goo")
}
}

func TestRebuildFromText_SimpleVietnamese(t *testing.T) {
tests := []struct {
input string
want string
}{
{"chào", "chào"},
{"việt", "việt"},
{"google", "google"},
{"đường", "đường"},
{"người", "người"},
{"as", "as"},
{"được", "được"},
{"những", "những"},
{"ước", "ước"},
{"ươi", "ươi"},
}

for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
composition := RebuildCompositionFromText(tt.input, true)
result := Flatten(composition, VietnameseMode)
if result != tt.want {
t.Errorf("RebuildFromText(%q) = %q, want %q", tt.input, result, tt.want)
}
})
}
}

func TestRebuildFromText_UpperCase(t *testing.T) {
tests := []struct {
input string
want string
}{
{"Việt", "Việt"},
{"OO", "OO"},
{"DD", "DD"},
{"Nội", "Nội"},
}

for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
composition := RebuildCompositionFromText(tt.input, true)
result := Flatten(composition, VietnameseMode)
if result != tt.want {
t.Errorf("RebuildFromText(%q) = %q, want %q", tt.input, result, tt.want)
}
})
}
}

func TestRebuildFromText_ThenProcessKey(t *testing.T) {
im := ParseInputMethod(GetInputMethodDefinitions(), "Telex")
engine := NewEngine(im, EstdFlags)

engine.RebuildEngineFromText("go")
result := engine.GetProcessedString(VietnameseMode)
if result != "go" {
t.Errorf("After rebuild 'go', GetProcessedString = %q, want %q", result, "go")
}

engine.ProcessKey('s', VietnameseMode)
result = engine.GetProcessedString(VietnameseMode)
if result != "gó" {
t.Errorf("After rebuild 'go' + ProcessKey('s'), got %q, want %q", result, "gó")
}
}

func TestRebuildFromText_CompareWithProcessString(t *testing.T) {
im := ParseInputMethod(GetInputMethodDefinitions(), "Telex")
engine := NewEngine(im, EstdFlags)

engine.ProcessString("goo", VietnameseMode)
buggyResult := engine.GetProcessedString(VietnameseMode)

engine.Reset()
engine.RebuildEngineFromText("goo")
correctResult := engine.GetProcessedString(VietnameseMode)

if buggyResult == "goo" {
t.Log("ProcessString('goo') unexpectedly correct - Telex behavior may have changed")
}
if correctResult != "goo" {
t.Errorf("RebuildFromText('goo') = %q, want 'goo'", correctResult)
}
t.Logf("ProcessString('goo') = %q, RebuildFromText('goo') = %q", buggyResult, correctResult)
}

func TestRebuildFromText_WithPunctuationAndSpace(t *testing.T) {
tests := []struct {
name string
input string
want string
}{
{"vãi", "vãi", "vãi"},
{"vãi.", "vãi.", "vãi."},
{"vãi ", "vãi ", "vãi "},
{"vãi. ", "vãi. ", "vãi. "},
{"chào.", "chào.", "chào."},
{"chào. ", "chào. ", "chào. "},
{"chào, ", "chào, ", "chào, "},
{"chào. Xin", "chào. Xin", "chào. Xin"},
{"vãi, ", "vãi, ", "vãi, "},
{"vãi! ", "vãi! ", "vãi! "},
{"vãi? ", "vãi? ", "vãi? "},
{"vãi; ", "vãi; ", "vãi; "},
{"vãi: ", "vãi: ", "vãi: "},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
composition := RebuildCompositionFromText(tt.input, true)
result := Flatten(composition, VietnameseMode)
if result != tt.want {
t.Errorf("RebuildFromText(%q) = %q, want %q", tt.input, result, tt.want)
}
})
}
}

// Test RebuildEngineFromText with punctuation and space
func TestRebuildEngineFromText_WithPunctuationAndSpace(t *testing.T) {
im := ParseInputMethod(GetInputMethodDefinitions(), "Telex")
engine := NewEngine(im, EstdFlags)

tests := []struct {
name string
input string
want string
}{
{"vãi", "vãi", "vãi"},
{"vãi.", "vãi.", "vãi."},
{"vãi ", "vãi ", "vãi "},
{"vãi. ", "vãi. ", "vãi. "},
{"chào.", "chào.", "chào."},
{"chào. ", "chào. ", "chào. "},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
engine.RebuildEngineFromText(tt.input)
result := engine.GetProcessedString(VietnameseMode | FullText)
if result != tt.want {
t.Errorf("RebuildEngineFromText(%q) = %q, want %q", tt.input, result, tt.want)
}
})
}
}