From 23ba76246a9c1ffe60082108bb523e06ceb6c848 Mon Sep 17 00:00:00 2001 From: Nguyen Hoang Ky Date: Mon, 9 Feb 2026 14:44:43 +0700 Subject: [PATCH 1/2] add rebuild engine from text --- bamboo.go | 1 + rebuild.go | 97 +++++++++++++++++++++++++++++++++++++++++++ rebuild_test.go | 108 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 206 insertions(+) create mode 100644 rebuild.go create mode 100644 rebuild_test.go diff --git a/bamboo.go b/bamboo.go index f635f38..27c8ae9 100644 --- a/bamboo.go +++ b/bamboo.go @@ -50,6 +50,7 @@ type IEngine interface { RemoveLastChar(bool) RestoreLastWord(bool) Reset() + RebuildEngineFromText(string) } type BambooEngine struct { diff --git a/rebuild.go b/rebuild.go new file mode 100644 index 0000000..f8cc3cf --- /dev/null +++ b/rebuild.go @@ -0,0 +1,97 @@ +/* + * Bamboo - A Vietnamese Input method editor + * Copyright (C) Nguyễn Hoàng Kỳ + * + * This software is licensed under the MIT license. For more information, + * see . + */ + +package bamboo + +import ( + "unicode" +) + +// RebuildCompositionFromText creates a composition (list of Transformations) directly +// from a Vietnamese Unicode string, bypassing all Input Method rules. +func RebuildCompositionFromText(text string, stdStyle bool) []*Transformation { + var composition []*Transformation + + for _, ch := range text { + lowerCh := unicode.ToLower(ch) + isUpperCase := unicode.IsUpper(ch) + + // Decompose the character into root + mark + tone + tone := FindToneFromChar(lowerCh) + mark, hasMark := FindMarkFromChar(lowerCh) + + // Get the root character (no tone, no mark) + rootChar := lowerCh + if tone != ToneNone { + rootChar = AddToneToChar(rootChar, 0) + } + if hasMark && mark != MarkNone { + rootChar = AddMarkToChar(rootChar, 0) + } + + // Create the base Appending transformation for the root character + appendTrans := &Transformation{ + IsUpperCase: isUpperCase, + Rule: Rule{ + Key: rootChar, + EffectOn: rootChar, + EffectType: Appending, + Result: rootChar, + }, + } + composition = append(composition, appendTrans) + + if hasMark && mark != MarkNone { + markTrans := &Transformation{ + Target: appendTrans, + Rule: Rule{ + Key: 0, + EffectType: MarkTransformation, + Effect: uint8(mark), + EffectOn: rootChar, + Result: AddMarkToTonelessChar(rootChar, uint8(mark)), + }, + } + composition = append(composition, markTrans) + } + } + + // Apply tones + var lastTone Tone = ToneNone + for _, ch := range text { + lowerCh := unicode.ToLower(ch) + t := FindToneFromChar(lowerCh) + if t != ToneNone { + lastTone = t + } + } + + if lastTone != ToneNone { + toneTarget := findToneTarget(composition, stdStyle) + if toneTarget != nil { + toneTrans := &Transformation{ + Target: toneTarget, + Rule: Rule{ + Key: 0, + EffectType: ToneTransformation, + Effect: uint8(lastTone), + }, + } + composition = append(composition, toneTrans) + } + } + + return composition +} + +// RebuildEngineFromText resets the engine and rebuilds its internal composition +// state from the given Vietnamese Unicode text, bypassing all IM rules. +func (e *BambooEngine) RebuildEngineFromText(text string) { + e.Reset() + e.composition = RebuildCompositionFromText(text, e.flags&EstdToneStyle != 0) +} diff --git a/rebuild_test.go b/rebuild_test.go new file mode 100644 index 0000000..771b268 --- /dev/null +++ b/rebuild_test.go @@ -0,0 +1,108 @@ +/* + * Bamboo - A Vietnamese Input method editor + * Copyright (C) Nguyễn Hoàng Kỳ + * + * This software is licensed under the MIT license. For more information, + * see . + */ + +package bamboo + +import ( + "testing" +) + +func TestRebuildFromText_SimpleASCII(t *testing.T) { + composition := RebuildCompositionFromText("goo", true) + result := Flatten(composition, VietnameseMode) + if result != "goo" { + t.Errorf("RebuildFromText('goo') = %q, want %q", result, "goo") + } +} + +func TestRebuildFromText_SimpleVietnamese(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"chào", "chào"}, + {"việt", "việt"}, + {"google", "google"}, + {"đường", "đường"}, + {"người", "người"}, + {"as", "as"}, + {"được", "được"}, + {"những", "những"}, + {"ước", "ước"}, + {"ươi", "ươi"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + composition := RebuildCompositionFromText(tt.input, true) + result := Flatten(composition, VietnameseMode) + if result != tt.want { + t.Errorf("RebuildFromText(%q) = %q, want %q", tt.input, result, tt.want) + } + }) + } +} + +func TestRebuildFromText_UpperCase(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"Việt", "Việt"}, + {"OO", "OO"}, + {"DD", "DD"}, + {"Nội", "Nội"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + composition := RebuildCompositionFromText(tt.input, true) + result := Flatten(composition, VietnameseMode) + if result != tt.want { + t.Errorf("RebuildFromText(%q) = %q, want %q", tt.input, result, tt.want) + } + }) + } +} + +func TestRebuildFromText_ThenProcessKey(t *testing.T) { + im := ParseInputMethod(GetInputMethodDefinitions(), "Telex") + engine := NewEngine(im, EstdFlags) + + engine.RebuildEngineFromText("go") + result := engine.GetProcessedString(VietnameseMode) + if result != "go" { + t.Errorf("After rebuild 'go', GetProcessedString = %q, want %q", result, "go") + } + + engine.ProcessKey('s', VietnameseMode) + result = engine.GetProcessedString(VietnameseMode) + if result != "gó" { + t.Errorf("After rebuild 'go' + ProcessKey('s'), got %q, want %q", result, "gó") + } +} + +func TestRebuildFromText_CompareWithProcessString(t *testing.T) { + im := ParseInputMethod(GetInputMethodDefinitions(), "Telex") + engine := NewEngine(im, EstdFlags) + + engine.ProcessString("goo", VietnameseMode) + buggyResult := engine.GetProcessedString(VietnameseMode) + + engine.Reset() + engine.RebuildEngineFromText("goo") + correctResult := engine.GetProcessedString(VietnameseMode) + + if buggyResult == "goo" { + t.Log("ProcessString('goo') unexpectedly correct - Telex behavior may have changed") + } + if correctResult != "goo" { + t.Errorf("RebuildFromText('goo') = %q, want 'goo'", correctResult) + } + t.Logf("ProcessString('goo') = %q, RebuildFromText('goo') = %q", buggyResult, correctResult) +} From a43b5a416b10de2163aaa545a6656b4309389161 Mon Sep 17 00:00:00 2001 From: Nguyen Hoang Ky Date: Mon, 9 Feb 2026 16:56:49 +0700 Subject: [PATCH 2/2] Fix wrong tone mark when have punctuation --- rebuild.go | 56 ++++++++++++++++++++++++++++++++++++++++++++- rebuild_test.go | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 1 deletion(-) diff --git a/rebuild.go b/rebuild.go index f8cc3cf..d7d906b 100644 --- a/rebuild.go +++ b/rebuild.go @@ -17,7 +17,61 @@ import ( func RebuildCompositionFromText(text string, stdStyle bool) []*Transformation { var composition []*Transformation + // Split text into words by word break symbols (punctuation, spaces, etc.) + words := splitTextIntoWords(text) + + for _, word := range words { + wordComposition := processWordToComposition(word, stdStyle) + composition = append(composition, wordComposition...) + } + + return composition +} + +// splitTextIntoWords splits text into words, preserving delimiters +func splitTextIntoWords(text string) []string { + var words []string + var currentWord []rune + for _, ch := range text { + if IsWordBreakSymbol(ch) { + if len(currentWord) > 0 { + words = append(words, string(currentWord)) + currentWord = nil + } + words = append(words, string(ch)) + } else { + currentWord = append(currentWord, ch) + } + } + + if len(currentWord) > 0 { + words = append(words, string(currentWord)) + } + + return words +} + +// processWordToComposition converts a word into transformations +func processWordToComposition(word string, stdStyle bool) []*Transformation { + var composition []*Transformation + + if len(word) == 1 && IsWordBreakSymbol([]rune(word)[0]) { + ch := []rune(word)[0] + isUpperCase := unicode.IsUpper(ch) + appendTrans := &Transformation{ + IsUpperCase: isUpperCase, + Rule: Rule{ + Key: ch, + EffectOn: ch, + EffectType: Appending, + Result: ch, + }, + } + return []*Transformation{appendTrans} + } + + for _, ch := range word { lowerCh := unicode.ToLower(ch) isUpperCase := unicode.IsUpper(ch) @@ -63,7 +117,7 @@ func RebuildCompositionFromText(text string, stdStyle bool) []*Transformation { // Apply tones var lastTone Tone = ToneNone - for _, ch := range text { + for _, ch := range word { lowerCh := unicode.ToLower(ch) t := FindToneFromChar(lowerCh) if t != ToneNone { diff --git a/rebuild_test.go b/rebuild_test.go index 771b268..8f4503f 100644 --- a/rebuild_test.go +++ b/rebuild_test.go @@ -106,3 +106,64 @@ func TestRebuildFromText_CompareWithProcessString(t *testing.T) { } t.Logf("ProcessString('goo') = %q, RebuildFromText('goo') = %q", buggyResult, correctResult) } + +func TestRebuildFromText_WithPunctuationAndSpace(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + {"vãi", "vãi", "vãi"}, + {"vãi.", "vãi.", "vãi."}, + {"vãi ", "vãi ", "vãi "}, + {"vãi. ", "vãi. ", "vãi. "}, + {"chào.", "chào.", "chào."}, + {"chào. ", "chào. ", "chào. "}, + {"chào, ", "chào, ", "chào, "}, + {"chào. Xin", "chào. Xin", "chào. Xin"}, + {"vãi, ", "vãi, ", "vãi, "}, + {"vãi! ", "vãi! ", "vãi! "}, + {"vãi? ", "vãi? ", "vãi? "}, + {"vãi; ", "vãi; ", "vãi; "}, + {"vãi: ", "vãi: ", "vãi: "}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + composition := RebuildCompositionFromText(tt.input, true) + result := Flatten(composition, VietnameseMode) + if result != tt.want { + t.Errorf("RebuildFromText(%q) = %q, want %q", tt.input, result, tt.want) + } + }) + } +} + +// Test RebuildEngineFromText with punctuation and space +func TestRebuildEngineFromText_WithPunctuationAndSpace(t *testing.T) { + im := ParseInputMethod(GetInputMethodDefinitions(), "Telex") + engine := NewEngine(im, EstdFlags) + + tests := []struct { + name string + input string + want string + }{ + {"vãi", "vãi", "vãi"}, + {"vãi.", "vãi.", "vãi."}, + {"vãi ", "vãi ", "vãi "}, + {"vãi. ", "vãi. ", "vãi. "}, + {"chào.", "chào.", "chào."}, + {"chào. ", "chào. ", "chào. "}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + engine.RebuildEngineFromText(tt.input) + result := engine.GetProcessedString(VietnameseMode | FullText) + if result != tt.want { + t.Errorf("RebuildEngineFromText(%q) = %q, want %q", tt.input, result, tt.want) + } + }) + } +}