-
Notifications
You must be signed in to change notification settings - Fork 1
/
pinyin.go
75 lines (64 loc) · 1.35 KB
/
pinyin.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
package main
import (
"encoding/json"
"io"
"io/ioutil"
"os"
"strings"
"github.com/hermanschaaf/cedict"
)
// extractTone splits the tone number and the pinyin syllable returning a string
// and an integer, e.g., dong1 => dong, 1
func extractTone(p string) (string, int) {
tone := int(p[len(p)-1]) - 48
if tone < 0 || tone > 5 {
return p, 0
}
return p[0 : len(p)-1], tone
}
func loadCEDict(file io.Reader) (map[string]string, error) {
pronDict := map[string]string{}
c := cedict.New(file)
for {
err := c.NextEntry()
if err == cedict.NoMoreEntries {
return pronDict, nil
} else if err != nil {
return pronDict, err
}
r := []rune(c.Entry().Simplified)
pinyin := c.Entry().Pinyin
pinParts := strings.Split(pinyin, " ")
// skip entries where pinyin does not match chars
if len(pinParts) != len(r) {
continue
}
for i := range pinParts {
s, _ := extractTone(pinParts[i])
s = strings.ToLower(s)
if !strings.Contains(pronDict[s], string(r[i])) {
pronDict[s] += string(r[i])
}
}
}
return pronDict, nil
}
func main() {
file, err := os.Open("cedict_ts.u8")
if err != nil {
panic(err)
}
defer file.Close()
d, err := loadCEDict(file)
if err != nil {
panic(err)
}
b, err := json.Marshal(d)
if err != nil {
panic(err)
}
err = ioutil.WriteFile("sounds.json", b, 0644)
if err != nil {
panic(err)
}
}