-
Notifications
You must be signed in to change notification settings - Fork 0
/
interpreter.go
239 lines (225 loc) · 5.43 KB
/
interpreter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
package prg2p
import (
"bufio"
"errors"
"fmt"
"io"
"strings"
)
// errScan is raised when Interpreter encounters nil io.Reader interface passed
// to Scan() method.
var errScan = errors.New("scanning error on nil interface")
// Variable from an assignment statement with the name (left) and value (right)
// side of the operator.
//
// Examples:
// ALL = a, b, c, d ... n
// SA1 = a, e, y, u, o
type variable struct {
name string
value []string
}
// rule statement with four elements:
// - Left and right context
// - the source letter
// - the target phoneme(s)
//
// Examples:
// PUSTY ś -(b) si
// PUSTY n (ni, ci, dzi) n, ni
// (a, e) u PUSTY l_
type rule struct {
left []string
right []string
source string
target []string
}
// interpreter interprets G2P rules. It holds two components used to process
// text into phonemic transcription: variables and rules.
type interpreter struct {
vars map[string][]string // Ex. key = ALL, value = a, b, c ... z
rules []rule
}
// newInterpreter returns a new Interpreter instance responsible for parsing
// G2P rules. It takes variable assignments and rules as input and creates a
// structure that can be used for building other structures.
func newInterpreter() *interpreter {
i := &interpreter{
vars: make(map[string][]string),
}
return i
}
// scan populates Interpreter with G2P rules.
func (i *interpreter) scan(r io.Reader) error {
if r == nil {
return errScan
}
s := bufio.NewScanner(r)
for s.Scan() {
l := s.Text()
l = strings.TrimSpace(l)
if err := i.eval(l); err != nil {
return fmt.Errorf("could not evaluate %s", l)
}
}
return nil
}
// eval evaluates a line as a variable or a rule.
func (i *interpreter) eval(l string) error {
if l == "" || strings.HasPrefix(l, "#") {
return nil
}
if ok := strings.Contains(l, "="); ok {
err := i.asVar(l)
if err != nil {
return err
}
return nil
}
err := i.asRule(l)
if err != nil {
return err
}
return nil
}
// asVar evaluates a line as a variable assignment.
func (i *interpreter) asVar(l string) error {
sp := strings.Split(l, "=")
if len(sp) != 2 {
return fmt.Errorf("multiple assignments on one line %s", l)
}
vr := strings.TrimSpace(sp[0])
vals := strings.Split(sp[1], ",")
if len(vals) == 1 && strings.TrimSpace(vals[0]) == "" {
return fmt.Errorf("no values to assign to variable on line %s", l)
}
for i := range vals {
vals[i] = strings.TrimSpace(vals[i])
}
if vr == "ALL" {
vals = append(vals, "$")
}
i.vars[vr] = vals
return nil
}
// asRule evaluates a line as a rule statement.
func (i *interpreter) asRule(l string) error {
splits := strings.Split(l, "\t")
if len(splits) != 4 {
return fmt.Errorf("expected 4 splits in %s", l)
}
lCtx, err := i.context(splits[0])
if err != nil {
return err
}
rCtx, err := i.context(splits[2])
if err != nil {
return err
}
var target []string
for _, s := range strings.Split(splits[3], ",") {
s = strings.TrimSpace(s)
target = append(target, s)
}
if lCtx != nil && len(lCtx) == 0 {
return fmt.Errorf("empty left context in line %s", l)
}
if rCtx != nil && len(rCtx) == 0 {
return fmt.Errorf("empty right context in line %s", l)
}
r := rule{
left: lCtx,
right: rCtx,
source: splits[1],
target: target,
}
i.rules = append(i.rules, r)
return nil
}
// context returns the left/right context for the source character.
func (i *interpreter) context(v string) ([]string, error) {
if s, ok := i.vars[v]; ok && strings.Join(s, "") == "*" {
return nil, nil
}
if _, ok := i.vars["ALL"]; !ok { // "ALL" is the base slice to trim.
return nil, fmt.Errorf("variable \"ALL\" not set")
}
var out []string
var err error
if strings.HasPrefix(v, "-") {
out, err = i.asDifference(v)
if err != nil {
return nil, err
}
} else {
out, err = i.asConstraint(v)
if err != nil {
return nil, err
}
}
return out, nil
}
// asDifference returns a limited set of values by removing unwanted values
// them from ALL.
func (i *interpreter) asDifference(v string) ([]string, error) {
if strings.HasPrefix(v, "-(") {
if !strings.HasSuffix(v, ")") {
return nil, fmt.Errorf("expected \")\" in line %s", v)
}
var toRemove []string
for _, c := range strings.Split(v[2:len(v)-1], ",") {
c = strings.TrimSpace(c)
toRemove = append(toRemove, c)
}
out := rm(i.vars["ALL"], toRemove)
return out, nil
}
out := i.vars["ALL"]
for _, vr := range strings.Split(v, "-")[1:] {
vals, ok := i.vars[vr]
if !ok {
return nil, fmt.Errorf("variable \"%s\" not found", vr)
}
out = rm(out, vals)
}
return out, nil
}
// asConstraint returns a limited set of values.
func (i *interpreter) asConstraint(v string) ([]string, error) {
if strings.HasPrefix(v, "(") {
if !strings.HasSuffix(v, ")") {
return nil, fmt.Errorf("expected \")\" in line %s", v)
}
var toKeep []string
for _, c := range strings.Split(v[1:len(v)-1], ",") {
c = strings.TrimSpace(c)
toKeep = append(toKeep, c)
}
return toKeep, nil
}
var out []string
for _, vr := range strings.Split(v, "+") {
vals, ok := i.vars[vr]
if !ok {
return nil, fmt.Errorf("variable \"%s\" not found", vr)
}
for _, c := range vals {
out = append(out, c)
}
}
return out, nil
}
// rm removes items from the first slice if present in the second slice.
func rm(s1, s2 []string) []string {
var out []string
ref := make(map[string]bool)
for _, elem := range s2 {
ref[elem] = true
}
for _, elem := range s1 {
if _, ok := ref[elem]; !ok {
out = append(out, elem)
}
}
return out
}